Changes to the btree and pager that reduce the amount of I/O when dealing

with the freelist.  (1) Avoid journaling pages of a table that is being
deleted.  (2) Do not read the original content of pages being pulled off
of the freelist. (CVS 3671)

FossilOrigin-Name: 2ba5be311945a4c15b6dce7c01efefb513b9a973
This commit is contained in:
drh 2007-03-04 13:15:27 +00:00
parent 8efe541f24
commit 0787db6519
5 changed files with 46 additions and 41 deletions

View File

@ -1,5 +1,5 @@
C Handle\sthe\scase\swhere\sthe\sestimated\scost\sof\sa\svirtual\stable\sscan\sis\slarger\sthan\sSQLITE_BIG_DBL.\sTicket\s#2253.\s(CVS\s3670)
D 2007-03-02T08:12:22
C Changes\sto\sthe\sbtree\sand\spager\sthat\sreduce\sthe\samount\sof\sI/O\swhen\sdealing\nwith\sthe\sfreelist.\s\s(1)\sAvoid\sjournaling\spages\sof\sa\stable\sthat\sis\sbeing\ndeleted.\s\s(2)\sDo\snot\sread\sthe\soriginal\scontent\sof\spages\sbeing\spulled\soff\nof\sthe\sfreelist.\s(CVS\s3671)
D 2007-03-04T13:15:28
F Makefile.in 1fe3d0b46e40fd684e1e61f8e8056cefed16de9f
F Makefile.linux-gcc 2d8574d1ba75f129aba2019f0b959db380a90935
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@ -57,7 +57,7 @@ F src/alter.c 2c79ec40f65e33deaf90ca493422c74586e481a3
F src/analyze.c 7d2b7ab9a9c2fd6e55700f69064dfdd3e36d7a8a
F src/attach.c b11eb4d5d3fb99a10a626956bccc7215f6b68b16
F src/auth.c 902f4722661c796b97f007d9606bd7529c02597f
F src/btree.c 4d4bef16fbf4f53ec3b161cfe5bb19bbc27a281d
F src/btree.c 866536a3c667de942747ee3fd3914b31bb273fee
F src/btree.h 066444ee25bd6e6accb997bfd2cf5ace14dbcd00
F src/build.c 6bd68dc730b01c1727738f8e4b5c730eb0ddb421
F src/callback.c 31d22b4919c7645cbcbb1591ce2453e8c677c558
@ -85,8 +85,8 @@ F src/os_unix.c abdb0f7b8e3f078b8b48d4c0b8c801693046774d
F src/os_unix.h 5768d56d28240d3fe4537fac08cc85e4fb52279e
F src/os_win.c 8736cf3a49fd651a6538857480f302807d57814c
F src/os_win.h 41a946bea10f61c158ce8645e7646b29d44f122b
F src/pager.c 5d3a127c93489c93e59dfbb1be2b29e39e135f68
F src/pager.h 2e6d42f4ae004ae748a037b8468112b851c447a7
F src/pager.c c78d1cc1a02d9c6ab3263c3ca757e4466973fa2a
F src/pager.h 8881591ca23d1e5fd83c95fa8317245fbcf64227
F src/parse.y bcfe366c1fd61cfc40e5344eb69a31997a821af0
F src/pragma.c 5091300911670ddaa552bfa12c45cbca1bb7e7d6
F src/prepare.c 484389c6811415b8f23d259ac9c029613e1c72c3
@ -435,7 +435,7 @@ F www/tclsqlite.tcl bb0d1357328a42b1993d78573e587c6dcbc964b9
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl 97e2b5cd296f7d8057e11f44427dea8a4c2db513
P ddb4d0af5770c7030fe6e92119972c9508724b9a
R 35703e5b3fc1d8c9b1249f60db5a29c1
U danielk1977
Z cc77ee9ae5f32f8281d9b31d7adeb8f2
P 52885ed8b76a06588acf202a38b4feabfca1cfd1
R 35bc70c2ff87b67600b039941003bc67
U drh
Z a8523f47620c390e8f111fe930472ba8

View File

@ -1 +1 @@
52885ed8b76a06588acf202a38b4feabfca1cfd1
2ba5be311945a4c15b6dce7c01efefb513b9a973

View File

@ -9,7 +9,7 @@
** May you share freely, never taking more than you give.
**
*************************************************************************
** $Id: btree.c,v 1.335 2007/02/10 19:22:36 drh Exp $
** $Id: btree.c,v 1.336 2007/03/04 13:15:28 drh Exp $
**
** This file implements a external (disk-based) database using BTrees.
** For a detailed discussion of BTrees, refer to
@ -1382,11 +1382,11 @@ static void zeroPage(MemPage *pPage, int flags){
** Get a page from the pager. Initialize the MemPage.pBt and
** MemPage.aData elements if needed.
*/
static int getPage(BtShared *pBt, Pgno pgno, MemPage **ppPage){
static int getPage(BtShared *pBt, Pgno pgno, MemPage **ppPage, int clrFlag){
int rc;
unsigned char *aData;
MemPage *pPage;
rc = sqlite3pager_get(pBt->pPager, pgno, (void**)&aData);
rc = sqlite3pager_acquire(pBt->pPager, pgno, (void**)&aData, clrFlag);
if( rc ) return rc;
pPage = (MemPage*)&aData[pBt->pageSize];
pPage->aData = aData;
@ -1412,7 +1412,7 @@ static int getAndInitPage(
if( pgno==0 ){
return SQLITE_CORRUPT_BKPT;
}
rc = getPage(pBt, pgno, ppPage);
rc = getPage(pBt, pgno, ppPage, 0);
if( rc==SQLITE_OK && (*ppPage)->isInit==0 ){
rc = initPage(*ppPage, pParent);
}
@ -1834,7 +1834,7 @@ static int lockBtree(BtShared *pBt){
int rc, pageSize;
MemPage *pPage1;
if( pBt->pPage1 ) return SQLITE_OK;
rc = getPage(pBt, 1, &pPage1);
rc = getPage(pBt, 1, &pPage1, 0);
if( rc!=SQLITE_OK ) return rc;
@ -2237,7 +2237,7 @@ static int relocatePage(
** iPtrPage.
*/
if( eType!=PTRMAP_ROOTPAGE ){
rc = getPage(pBt, iPtrPage, &pPtrPage);
rc = getPage(pBt, iPtrPage, &pPtrPage, 0);
if( rc!=SQLITE_OK ){
return rc;
}
@ -2341,7 +2341,7 @@ static int autoVacuumCommit(BtShared *pBt, Pgno *nTrunc){
if( eType==PTRMAP_FREEPAGE ){
continue;
}
rc = getPage(pBt, iDbPage, &pDbMemPage);
rc = getPage(pBt, iDbPage, &pDbMemPage, 0);
if( rc!=SQLITE_OK ) goto autovacuum_out;
/* Find the next page in the free-list that is not already at the end
@ -2525,7 +2525,7 @@ int sqlite3BtreeRollback(Btree *p){
/* The rollback may have destroyed the pPage1->aData value. So
** call getPage() on page 1 again to make sure pPage1->aData is
** set correctly. */
if( getPage(pBt, 1, &pPage1)==SQLITE_OK ){
if( getPage(pBt, 1, &pPage1, 0)==SQLITE_OK ){
releasePage(pPage1);
}
assert( countWriteCursors(pBt)==0 );
@ -3618,7 +3618,7 @@ static int allocatePage(
}else{
iTrunk = get4byte(&pPage1->aData[32]);
}
rc = getPage(pBt, iTrunk, &pTrunk);
rc = getPage(pBt, iTrunk, &pTrunk, 0);
if( rc ){
pTrunk = 0;
goto end_allocate_page;
@ -3668,7 +3668,7 @@ static int allocatePage(
*/
MemPage *pNewTrunk;
Pgno iNewTrunk = get4byte(&pTrunk->aData[8]);
rc = getPage(pBt, iNewTrunk, &pNewTrunk);
rc = getPage(pBt, iNewTrunk, &pNewTrunk, 0);
if( rc!=SQLITE_OK ){
goto end_allocate_page;
}
@ -3734,7 +3734,7 @@ static int allocatePage(
memcpy(&aData[8+closest*4], &aData[4+k*4], 4);
}
put4byte(&aData[4], k-1);
rc = getPage(pBt, *pPgno, ppPage);
rc = getPage(pBt, *pPgno, ppPage, 1);
if( rc==SQLITE_OK ){
sqlite3pager_dont_rollback((*ppPage)->aData);
rc = sqlite3pager_write((*ppPage)->aData);
@ -3766,7 +3766,7 @@ static int allocatePage(
#endif
assert( *pPgno!=PENDING_BYTE_PAGE(pBt) );
rc = getPage(pBt, *pPgno, ppPage);
rc = getPage(pBt, *pPgno, ppPage, 0);
if( rc ) return rc;
rc = sqlite3pager_write((*ppPage)->aData);
if( rc!=SQLITE_OK ){
@ -3835,7 +3835,7 @@ static int freePage(MemPage *pPage){
/* Other free pages already exist. Retrive the first trunk page
** of the freelist and find out how many leaves it has. */
MemPage *pTrunk;
rc = getPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk);
rc = getPage(pBt, get4byte(&pPage1->aData[32]), &pTrunk, 0);
if( rc ) return rc;
k = get4byte(&pTrunk->aData[4]);
if( k>=pBt->usableSize/4 - 8 ){
@ -3883,7 +3883,7 @@ static int clearCell(MemPage *pPage, unsigned char *pCell){
if( ovflPgno>sqlite3pager_pagecount(pBt->pPager) ){
return SQLITE_CORRUPT_BKPT;
}
rc = getPage(pBt, ovflPgno, &pOvfl);
rc = getPage(pBt, ovflPgno, &pOvfl, 0);
if( rc ) return rc;
ovflPgno = get4byte(pOvfl->aData);
rc = freePage(pOvfl);
@ -3975,7 +3975,6 @@ static int fillInCell(
#endif
if( rc ){
releasePage(pToRelease);
/* clearCell(pPage, pCell); */
return rc;
}
put4byte(pPrior, pgnoOvfl);
@ -4978,7 +4977,7 @@ static int balance_shallower(MemPage *pPage){
pgnoChild = get4byte(&pPage->aData[pPage->hdrOffset+8]);
assert( pgnoChild>0 );
assert( pgnoChild<=sqlite3pager_pagecount(pPage->pBt->pPager) );
rc = getPage(pPage->pBt, pgnoChild, &pChild);
rc = getPage(pPage->pBt, pgnoChild, &pChild, 0);
if( rc ) goto end_shallow_balance;
if( pPage->pgno==1 ){
rc = initPage(pChild, pPage);
@ -5433,7 +5432,7 @@ int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){
Pgno iPtrPage;
releasePage(pPageMove);
rc = getPage(pBt, pgnoRoot, &pRoot);
rc = getPage(pBt, pgnoRoot, &pRoot, 0);
if( rc!=SQLITE_OK ){
return rc;
}
@ -5454,7 +5453,7 @@ int sqlite3BtreeCreateTable(Btree *p, int *piTable, int flags){
if( rc!=SQLITE_OK ){
return rc;
}
rc = getPage(pBt, pgnoRoot, &pRoot);
rc = getPage(pBt, pgnoRoot, &pRoot, 0);
if( rc!=SQLITE_OK ){
return rc;
}
@ -5512,8 +5511,6 @@ static int clearDatabasePage(
rc = getAndInitPage(pBt, pgno, &pPage, pParent);
if( rc ) goto cleardatabasepage_out;
rc = sqlite3pager_write(pPage->aData);
if( rc ) goto cleardatabasepage_out;
for(i=0; i<pPage->nCell; i++){
pCell = findCell(pPage, i);
if( !pPage->leaf ){
@ -5529,7 +5526,7 @@ static int clearDatabasePage(
}
if( freePageFlag ){
rc = freePage(pPage);
}else{
}else if( (rc = sqlite3pager_write(pPage->aData))==0 ){
zeroPage(pPage, pPage->aData[0] | PTF_LEAF);
}
@ -5605,7 +5602,7 @@ int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
return SQLITE_LOCKED;
}
rc = getPage(pBt, (Pgno)iTable, &pPage);
rc = getPage(pBt, (Pgno)iTable, &pPage, 0);
if( rc ) return rc;
rc = sqlite3BtreeClearTable(p, iTable);
if( rc ){
@ -5644,7 +5641,7 @@ int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
*/
MemPage *pMove;
releasePage(pPage);
rc = getPage(pBt, maxRootPgno, &pMove);
rc = getPage(pBt, maxRootPgno, &pMove, 0);
if( rc!=SQLITE_OK ){
return rc;
}
@ -5653,7 +5650,7 @@ int sqlite3BtreeDropTable(Btree *p, int iTable, int *piMoved){
if( rc!=SQLITE_OK ){
return rc;
}
rc = getPage(pBt, maxRootPgno, &pMove);
rc = getPage(pBt, maxRootPgno, &pMove, 0);
if( rc!=SQLITE_OK ){
return rc;
}
@ -5787,7 +5784,7 @@ static int btreePageDump(BtShared *pBt, int pgno, int recursive, MemPage *pParen
char range[20];
unsigned char payload[20];
rc = getPage(pBt, (Pgno)pgno, &pPage);
rc = getPage(pBt, (Pgno)pgno, &pPage, 0);
isInit = pPage->isInit;
if( pPage->isInit==0 ){
initPage(pPage, pParent);
@ -6168,7 +6165,7 @@ static int checkTreePage(
usableSize = pBt->usableSize;
if( iPage==0 ) return 0;
if( checkRef(pCheck, iPage, zParentContext) ) return 0;
if( (rc = getPage(pBt, (Pgno)iPage, &pPage))!=0 ){
if( (rc = getPage(pBt, (Pgno)iPage, &pPage, 0))!=0 ){
checkAppendMsg(pCheck, zContext,
"unable to get the page. error code=%d", rc);
return 0;

View File

@ -18,7 +18,7 @@
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.284 2007/03/01 00:29:14 drh Exp $
** @(#) $Id: pager.c,v 1.285 2007/03/04 13:15:28 drh Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"
@ -2625,8 +2625,13 @@ int sqlite3pager_release_memory(int nReq){
** has to go to disk, and could also playback an old journal if necessary.
** Since _lookup() never goes to disk, it never has to deal with locks
** or journal files.
**
** If clrFlag is false, the page contents are actually read from disk.
** If clfFlag is true, it means the page is about to be erased and
** rewritten without first being read so there is no point it doing
** the disk I/O.
*/
int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
int sqlite3pager_acquire(Pager *pPager, Pgno pgno, void **ppPage, int clrFlag){
PgHdr *pPg;
int rc;
@ -2784,7 +2789,9 @@ int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage){
/* Populate the page with data, either by reading from the database
** file, or by setting the entire page to zero.
*/
if( sqlite3pager_pagecount(pPager)<(int)pgno || MEMDB ){
if( sqlite3pager_pagecount(pPager)<(int)pgno || MEMDB
|| (clrFlag && !pPager->alwaysRollback)
){
memset(PGHDR_TO_DATA(pPg), 0, pPager->pageSize);
}else{
assert( MEMDB==0 );

View File

@ -13,7 +13,7 @@
** subsystem. The page cache subsystem reads and writes a file a page
** at a time and provides a journal for rollback.
**
** @(#) $Id: pager.h,v 1.52 2006/11/06 21:20:26 drh Exp $
** @(#) $Id: pager.h,v 1.53 2007/03/04 13:15:28 drh Exp $
*/
#ifndef _PAGER_H_
@ -78,7 +78,8 @@ int sqlite3pager_set_pagesize(Pager*, int);
int sqlite3pager_read_fileheader(Pager*, int, unsigned char*);
void sqlite3pager_set_cachesize(Pager*, int);
int sqlite3pager_close(Pager *pPager);
int sqlite3pager_get(Pager *pPager, Pgno pgno, void **ppPage);
int sqlite3pager_acquire(Pager *pPager, Pgno pgno, void **ppPage, int clrFlag);
#define sqlite3pager_get(A,B,C) sqlite3pager_acquire(A,B,C,0)
void *sqlite3pager_lookup(Pager *pPager, Pgno pgno);
int sqlite3pager_ref(void*);
int sqlite3pager_unref(void*);