Changes to the btree and pager that reduce the amount of I/O when dealing

with the freelist.  (1) Avoid journaling pages of a table that is being
deleted.  (2) Do not read the original content of pages being pulled off
of the freelist. (CVS 3671)

FossilOrigin-Name: 2ba5be311945a4c15b6dce7c01efefb513b9a973
This commit is contained in:
drh 2007-03-04 13:15:27 +00:00
parent 8efe541f24
commit 0787db6519
5 changed files with 46 additions and 41 deletions

View File

@ -1,5 +1,5 @@
C Handle\sthe\scase\swhere\sthe\sestimated\scost\sof\sa\svirtual\stable\sscan\sis\slarger\sthan\sSQLITE_BIG_DBL.\sTicket\s#2253.\s(CVS\s3670) C Changes\sto\sthe\sbtree\sand\spager\sthat\sreduce\sthe\samount\sof\sI/O\swhen\sdealing\nwith\sthe\sfreelist.\s\s(1)\sAvoid\sjournaling\spages\sof\sa\stable\sthat\sis\sbeing\ndeleted.\s\s(2)\sDo\snot\sread\sthe\soriginal\scontent\sof\spages\sbeing\spulled\soff\nof\sthe\sfreelist.\s(CVS\s3671)
D 2007-03-02T08:12:22 D 2007-03-04T13:15:28
F Makefile.in 1fe3d0b46e40fd684e1e61f8e8056cefed16de9f F Makefile.in 1fe3d0b46e40fd684e1e61f8e8056cefed16de9f
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935 F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028 F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@ -57,7 +57,7 @@ F src/alter.c 2c79ec40f65e33deaf90ca493422c74586e481a3
F src/analyze.c 7d2b7ab9a9c2fd6e55700f69064dfdd3e36d7a8a F src/analyze.c 7d2b7ab9a9c2fd6e55700f69064dfdd3e36d7a8a
F src/attach.c b11eb4d5d3fb99a10a626956bccc7215f6b68b16 F src/attach.c b11eb4d5d3fb99a10a626956bccc7215f6b68b16
F src/auth.c 902f4722661c796b97f007d9606bd7529c02597f F src/auth.c 902f4722661c796b97f007d9606bd7529c02597f
F src/btree.c 4d4bef16fbf4f53ec3b161cfe5bb19bbc27a281d F src/btree.c 866536a3c667de942747ee3fd3914b31bb273fee
F src/btree.h 066444ee25bd6e6accb997bfd2cf5ace14dbcd00 F src/btree.h 066444ee25bd6e6accb997bfd2cf5ace14dbcd00
F src/build.c 6bd68dc730b01c1727738f8e4b5c730eb0ddb421 F src/build.c 6bd68dc730b01c1727738f8e4b5c730eb0ddb421
F src/callback.c 31d22b4919c7645cbcbb1591ce2453e8c677c558 F src/callback.c 31d22b4919c7645cbcbb1591ce2453e8c677c558
@ -85,8 +85,8 @@ F src/os_unix.c abdb0f7b8e3f078b8b48d4c0b8c801693046774d
F src/os_unix.h 5768d56d28240d3fe4537fac08cc85e4fb52279e F src/os_unix.h 5768d56d28240d3fe4537fac08cc85e4fb52279e
F src/os_win.c 8736cf3a49fd651a6538857480f302807d57814c F src/os_win.c 8736cf3a49fd651a6538857480f302807d57814c
F src/os_win.h 41a946bea10f61c158ce8645e7646b29d44f122b F src/os_win.h 41a946bea10f61c158ce8645e7646b29d44f122b
F src/pager.c 5d3a127c93489c93e59dfbb1be2b29e39e135f68 F src/pager.c c78d1cc1a02d9c6ab3263c3ca757e4466973fa2a
F src/pager.h 2e6d42f4ae004ae748a037b8468112b851c447a7 F src/pager.h 8881591ca23d1e5fd83c95fa8317245fbcf64227
F src/parse.y bcfe366c1fd61cfc40e5344eb69a31997a821af0 F src/parse.y bcfe366c1fd61cfc40e5344eb69a31997a821af0
F src/pragma.c 5091300911670ddaa552bfa12c45cbca1bb7e7d6 F src/pragma.c 5091300911670ddaa552bfa12c45cbca1bb7e7d6
F src/prepare.c 484389c6811415b8f23d259ac9c029613e1c72c3 F src/prepare.c 484389c6811415b8f23d259ac9c029613e1c72c3
@ -435,7 +435,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0 F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513 F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
P ddb4d0af5770c7030fe6e92119972c9508724b9a P 52885ed8b76a06588acf202a38b4feabfca1cfd1
R 35703e5b3fc1d8c9b1249f60db5a29c1 R 35bc70c2ff87b67600b039941003bc67
U danielk1977 U drh
Z cc77ee9ae5f32f8281d9b31d7adeb8f2 Z a8523f47620c390e8f111fe930472ba8

View File

@ -1 +1 @@
52885ed8b76a06588acf202a38b4feabfca1cfd1 2ba5be311945a4c15b6dce7c01efefb513b9a973

View File

@ -9,7 +9,7 @@
** May you share freely, never taking more than you give. ** May you share freely, never taking more than you give.
** **
************************************************************************* *************************************************************************
** $Id: btree.c,v 1.335 2007/02/10 19:22:36 drh Exp $ ** $Id: btree.c,v 1.336 2007/03/04 13:15:28 drh Exp $
** **
** This file implements a external (disk-based) database using BTrees. ** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to ** For a detailed discussion of BTrees, refer to
@ -1382,11 +1382,11 @@ static void zeroPage(MemPage *pPage, int flags){
** Get a page from the pager. Initialize the MemPage.pBt and ** Get a page from the pager. Initialize the MemPage.pBt and
** MemPage.aData elements if needed. ** MemPage.aData elements if needed.
*/ */
static int getPage(BtShared *pBt, Pgno pgno, MemPage **ppPage){ static int getPage(BtShared *pBt, Pgno pgno, MemPage **ppPage, int clrFlag){
int rc; int rc;
unsigned char *aData; unsigned char *aData;
MemPage *pPage; MemPage *pPage;
rc = sqlite3pager_get(pBt->pPager, pgno, (void**)&aData); rc = sqlite3pager_acquire(pBt->pPager, pgno, (void**)&aData, clrFlag);
if( rc ) return rc; if( rc ) return rc;
pPage = (MemPage*)&aData[pBt->pageSize]; pPage = (MemPage*)&aData[pBt->pageSize];
pPage->aData = aData; pPage->aData = aData;
@ -1412,7 +1412,7 @@ static int getAndInitPage(
if( pgno==0 ){ if( pgno==0 ){
return SQLITE_CORRUPT_BKPT; return SQLITE_CORRUPT_BKPT;
} }
rc = getPage(pBt, pgno, ppPage); rc = getPage(pBt, pgno, ppPage, 0);
if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){ if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){
rc = initPage(*ppPage, pParent); rc = initPage(*ppPage, pParent);
} }
@ -1834,7 +1834,7 @@ static int lockBtree(BtShared *pBt){
int rc, pageSize; int rc, pageSize;
MemPage *pPage1; MemPage *pPage1;
if( pBt->pPage1 ) return SQLITE_OK; if( pBt->pPage1 ) return SQLITE_OK;
rc = getPage(pBt, 1, &pPage1); rc = getPage(pBt, 1, &pPage1, 0);
if( rc!=SQLITE_OK ) return rc; if( rc!=SQLITE_OK ) return rc;
@ -2237,7 +2237,7 @@ static int relocatePage(
** iPtrPage. ** iPtrPage.
*/ */
if( eType!=PTRMAP_ROOTPAGE ){ if( eType!=PTRMAP_ROOTPAGE ){
rc = getPage(pBt, iPtrPage, &pPtrPage); rc = getPage(pBt, iPtrPage, &pPtrPage, 0);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
return rc; return rc;
} }
@ -2341,7 +2341,7 @@ static int autoVacuumCommit(BtShared *pBt, Pgno *nTrunc){
if( eType==PTRMAP_FREEPAGE ){ if( eType==PTRMAP_FREEPAGE ){
continue; continue;
} }
rc = getPage(pBt, iDbPage, &pDbMemPage); rc = getPage(pBt, iDbPage, &pDbMemPage, 0);
if( rc!=SQLITE_OK ) goto autovacuum_out; if( rc!=SQLITE_OK ) goto autovacuum_out;
/* Find the next page in the free-list that is not already at the end /* Find the next page in the free-list that is not already at the end
@ -2525,7 +2525,7 @@ int sqlite3BtreeRollback(Btree *p){
/* The rollback may have destroyed the pPage1->aData value. So /* The rollback may have destroyed the pPage1->aData value. So
** call getPage() on page 1 again to make sure pPage1->aData is ** call getPage() on page 1 again to make sure pPage1->aData is
** set correctly. */ ** set correctly. */
if( getPage(pBt, 1, &pPage1)==SQLITE_OK ){ if( getPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){
releasePage(pPage1); releasePage(pPage1);
} }
assert( countWriteCursors(pBt)==0 ); assert( countWriteCursors(pBt)==0 );
@ -3618,7 +3618,7 @@ static int allocatePage(
}else{ }else{
iTrunk = get4byte(&pPage1->aData[32]); iTrunk = get4byte(&pPage1->aData[32]);
} }
rc = getPage(pBt, iTrunk, &pTrunk); rc = getPage(pBt, iTrunk, &pTrunk, 0);
if( rc ){ if( rc ){
pTrunk = 0; pTrunk = 0;
goto end_allocate_page; goto end_allocate_page;
@ -3668,7 +3668,7 @@ static int allocatePage(
*/ */
MemPage *pNewTrunk; MemPage *pNewTrunk;
Pgno iNewTrunk = get4byte(&pTrunk->aData[8]); Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
rc = getPage(pBt, iNewTrunk, &pNewTrunk); rc = getPage(pBt, iNewTrunk, &pNewTrunk, 0);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
goto end_allocate_page; goto end_allocate_page;
} }
@ -3734,7 +3734,7 @@ static int allocatePage(
memcpy(&aData[8+closest*4], &aData[4+k*4], 4); memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
} }
put4byte(&aData[4], k-1); put4byte(&aData[4], k-1);
rc = getPage(pBt, *pPgno, ppPage); rc = getPage(pBt, *pPgno, ppPage, 1);
if( rc==SQLITE_OK ){ if( rc==SQLITE_OK ){
sqlite3pager_dont_rollback((*ppPage)->aData); sqlite3pager_dont_rollback((*ppPage)->aData);
rc = sqlite3pager_write((*ppPage)->aData); rc = sqlite3pager_write((*ppPage)->aData);
@ -3766,7 +3766,7 @@ static int allocatePage(
#endif #endif
assert( *pPgno!=PENDING_BYTE_PAGE(pBt) ); assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
rc = getPage(pBt, *pPgno, ppPage); rc = getPage(pBt, *pPgno, ppPage, 0);
if( rc ) return rc; if( rc ) return rc;
rc = sqlite3pager_write((*ppPage)->aData); rc = sqlite3pager_write((*ppPage)->aData);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
@ -3835,7 +3835,7 @@ static int freePage(MemPage *pPage){
/* Other free pages already exist. Retrive the first trunk page /* Other free pages already exist. Retrive the first trunk page
** of the freelist and find out how many leaves it has. */ ** of the freelist and find out how many leaves it has. */
MemPage *pTrunk; MemPage *pTrunk;
rc = getPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk); rc = getPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk, 0);
if( rc ) return rc; if( rc ) return rc;
k = get4byte(&pTrunk->aData[4]); k = get4byte(&pTrunk->aData[4]);
if( k>=pBt->usableSize/4 - 8 ){ if( k>=pBt->usableSize/4 - 8 ){
@ -3883,7 +3883,7 @@ static int clearCell(MemPage *pPage, unsigned char *pCell){
if( ovflPgno>sqlite3pager_pagecount(pBt->pPager) ){ if( ovflPgno>sqlite3pager_pagecount(pBt->pPager) ){
return SQLITE_CORRUPT_BKPT; return SQLITE_CORRUPT_BKPT;
} }
rc = getPage(pBt, ovflPgno, &pOvfl); rc = getPage(pBt, ovflPgno, &pOvfl, 0);
if( rc ) return rc; if( rc ) return rc;
ovflPgno = get4byte(pOvfl->aData); ovflPgno = get4byte(pOvfl->aData);
rc = freePage(pOvfl); rc = freePage(pOvfl);
@ -3975,7 +3975,6 @@ static int fillInCell(
#endif #endif
if( rc ){ if( rc ){
releasePage(pToRelease); releasePage(pToRelease);
/* clearCell(pPage, pCell); */
return rc; return rc;
} }
put4byte(pPrior, pgnoOvfl); put4byte(pPrior, pgnoOvfl);
@ -4978,7 +4977,7 @@ static int balance_shallower(MemPage *pPage){
pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]); pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
assert( pgnoChild>0 ); assert( pgnoChild>0 );
assert( pgnoChild<=sqlite3pager_pagecount(pPage->pBt->pPager) ); assert( pgnoChild<=sqlite3pager_pagecount(pPage->pBt->pPager) );
rc = getPage(pPage->pBt, pgnoChild, &pChild); rc = getPage(pPage->pBt, pgnoChild, &pChild, 0);
if( rc ) goto end_shallow_balance; if( rc ) goto end_shallow_balance;
if( pPage->pgno==1 ){ if( pPage->pgno==1 ){
rc = initPage(pChild, pPage); rc = initPage(pChild, pPage);
@ -5433,7 +5432,7 @@ int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){
Pgno iPtrPage; Pgno iPtrPage;
releasePage(pPageMove); releasePage(pPageMove);
rc = getPage(pBt, pgnoRoot, &pRoot); rc = getPage(pBt, pgnoRoot, &pRoot, 0);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
return rc; return rc;
} }
@ -5454,7 +5453,7 @@ int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
return rc; return rc;
} }
rc = getPage(pBt, pgnoRoot, &pRoot); rc = getPage(pBt, pgnoRoot, &pRoot, 0);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
return rc; return rc;
} }
@ -5512,8 +5511,6 @@ static int clearDatabasePage(
rc = getAndInitPage(pBt, pgno, &pPage, pParent); rc = getAndInitPage(pBt, pgno, &pPage, pParent);
if( rc ) goto cleardatabasepage_out; if( rc ) goto cleardatabasepage_out;
rc = sqlite3pager_write(pPage->aData);
if( rc ) goto cleardatabasepage_out;
for(i=0; i<pPage->nCell; i++){ for(i=0; i<pPage->nCell; i++){
pCell = findCell(pPage, i); pCell = findCell(pPage, i);
if( !pPage->leaf ){ if( !pPage->leaf ){
@ -5529,7 +5526,7 @@ static int clearDatabasePage(
} }
if( freePageFlag ){ if( freePageFlag ){
rc = freePage(pPage); rc = freePage(pPage);
}else{ }else if( (rc = sqlite3pager_write(pPage->aData))==0 ){
zeroPage(pPage, pPage->aData[0] | PTF_LEAF); zeroPage(pPage, pPage->aData[0] | PTF_LEAF);
} }
@ -5605,7 +5602,7 @@ int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
return SQLITE_LOCKED; return SQLITE_LOCKED;
} }
rc = getPage(pBt, (Pgno)iTable, &pPage); rc = getPage(pBt, (Pgno)iTable, &pPage, 0);
if( rc ) return rc; if( rc ) return rc;
rc = sqlite3BtreeClearTable(p, iTable); rc = sqlite3BtreeClearTable(p, iTable);
if( rc ){ if( rc ){
@ -5644,7 +5641,7 @@ int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
*/ */
MemPage *pMove; MemPage *pMove;
releasePage(pPage); releasePage(pPage);
rc = getPage(pBt, maxRootPgno, &pMove); rc = getPage(pBt, maxRootPgno, &pMove, 0);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
return rc; return rc;
} }
@ -5653,7 +5650,7 @@ int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
return rc; return rc;
} }
rc = getPage(pBt, maxRootPgno, &pMove); rc = getPage(pBt, maxRootPgno, &pMove, 0);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
return rc; return rc;
} }
@ -5787,7 +5784,7 @@ static int btreePageDump(BtShared *pBt, int pgno, int recursive, MemPage *pParen
char range[20]; char range[20];
unsigned char payload[20]; unsigned char payload[20];
rc = getPage(pBt, (Pgno)pgno, &pPage); rc = getPage(pBt, (Pgno)pgno, &pPage, 0);
isInit = pPage->isInit; isInit = pPage->isInit;
if( pPage->isInit==0 ){ if( pPage->isInit==0 ){
initPage(pPage, pParent); initPage(pPage, pParent);
@ -6168,7 +6165,7 @@ static int checkTreePage(
usableSize = pBt->usableSize; usableSize = pBt->usableSize;
if( iPage==0 ) return 0; if( iPage==0 ) return 0;
if( checkRef(pCheck, iPage, zParentContext) ) return 0; if( checkRef(pCheck, iPage, zParentContext) ) return 0;
if( (rc = getPage(pBt, (Pgno)iPage, &pPage))!=0 ){ if( (rc = getPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
checkAppendMsg(pCheck, zContext, checkAppendMsg(pCheck, zContext,
"unable to get the page. error code=%d", rc); "unable to get the page. error code=%d", rc);
return 0; return 0;

View File

@ -18,7 +18,7 @@
** file simultaneously, or one process from reading the database while ** file simultaneously, or one process from reading the database while
** another is writing. ** another is writing.
** **
** @(#) $Id: pager.c,v 1.284 2007/03/01 00:29:14 drh Exp $ ** @(#) $Id: pager.c,v 1.285 2007/03/04 13:15:28 drh Exp $
*/ */
#ifndef SQLITE_OMIT_DISKIO #ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h" #include "sqliteInt.h"
@ -2625,8 +2625,13 @@ int sqlite3pager_release_memory(int nReq){
** has to go to disk, and could also playback an old journal if necessary. ** has to go to disk, and could also playback an old journal if necessary.
** Since _lookup() never goes to disk, it never has to deal with locks ** Since _lookup() never goes to disk, it never has to deal with locks
** or journal files. ** or journal files.
**
** If clrFlag is false, the page contents are actually read from disk.
** If clfFlag is true, it means the page is about to be erased and
** rewritten without first being read so there is no point it doing
** the disk I/O.
*/ */
int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){ int sqlite3pager_acquire(Pager *pPager, Pgno pgno, void **ppPage, int clrFlag){
PgHdr *pPg; PgHdr *pPg;
int rc; int rc;
@ -2784,7 +2789,9 @@ int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
/* Populate the page with data, either by reading from the database /* Populate the page with data, either by reading from the database
** file, or by setting the entire page to zero. ** file, or by setting the entire page to zero.
*/ */
if( sqlite3pager_pagecount(pPager)<(int)pgno || MEMDB ){ if( sqlite3pager_pagecount(pPager)<(int)pgno || MEMDB
|| (clrFlag && !pPager->alwaysRollback)
){
memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize); memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
}else{ }else{
assert( MEMDB==0 ); assert( MEMDB==0 );

View File

@ -13,7 +13,7 @@
** subsystem. The page cache subsystem reads and writes a file a page ** subsystem. The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback. ** at a time and provides a journal for rollback.
** **
** @(#) $Id: pager.h,v 1.52 2006/11/06 21:20:26 drh Exp $ ** @(#) $Id: pager.h,v 1.53 2007/03/04 13:15:28 drh Exp $
*/ */
#ifndef _PAGER_H_ #ifndef _PAGER_H_
@ -78,7 +78,8 @@ int sqlite3pager_set_pagesize(Pager*, int);
int sqlite3pager_read_fileheader(Pager*, int, unsigned char*); int sqlite3pager_read_fileheader(Pager*, int, unsigned char*);
void sqlite3pager_set_cachesize(Pager*, int); void sqlite3pager_set_cachesize(Pager*, int);
int sqlite3pager_close(Pager *pPager); int sqlite3pager_close(Pager *pPager);
int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage); int sqlite3pager_acquire(Pager *pPager, Pgno pgno, void **ppPage, int clrFlag);
#define sqlite3pager_get(A,B,C) sqlite3pager_acquire(A,B,C,0)
void *sqlite3pager_lookup(Pager *pPager, Pgno pgno); void *sqlite3pager_lookup(Pager *pPager, Pgno pgno);
int sqlite3pager_ref(void*); int sqlite3pager_ref(void*);
int sqlite3pager_unref(void*); int sqlite3pager_unref(void*);