diff --git a/manifest b/manifest index ee0cee40d9..4e258ea0da 100644 --- a/manifest +++ b/manifest @@ -1,8 +1,8 @@ -----BEGIN PGP SIGNED MESSAGE----- Hash: SHA1 -C Untested\simplementation\sof\sthe\sshared-memory\sdead-man-switch. -D 2010-04-29T16:40:51 +C Refactor\swal.c\sto\suse\sthe\sVFS.\s\sThis\scheck-in\scompiles\sand\slinks\sand\sworks\nok\sas\slong\sas\syou\sleave\sWAL\sturned\soff,\sbut\sWAL\sdoes\snot\swork. +D 2010-04-29T22:34:08 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in d83a0ffef3dcbfb08b410a6c6dd6c009ec9167fb F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -112,7 +112,7 @@ F src/auth.c 523da7fb4979469955d822ff9298352d6b31de34 F src/backup.c e86634da8c48357a759694c9c7c471125cd8d5a8 F src/bitvec.c 06ad2c36a9c3819c0b9cbffec7b15f58d5d834e0 F src/btmutex.c 96a12f50f7a17475155971a241d85ec5171573ff -F src/btree.c dd26f3b5725ac3cbc765857a76523ac10dbfe761 +F src/btree.c 2e41e62e80ad4686b3322721982ceb7aa6d8c9f1 F src/btree.h dd83041eda10c17daf023257c1fc883b5f71f85a F src/btreeInt.h b0c87f6725b06a0aa194a6d25d54b16ce9d6e291 F src/build.c 11100b66fb97638d2d874c1d34d8db90650bb1d7 @@ -134,7 +134,7 @@ F src/journal.c b0ea6b70b532961118ab70301c00a33089f9315c F src/legacy.c a199d7683d60cef73089e892409113e69c23a99f F src/lempar.c 7f026423f4d71d989e719a743f98a1cbd4e6d99e F src/loadext.c 1c7a61ce1281041f437333f366a96aa0d29bb581 -F src/main.c 569561a238a0bb6d55affbaa224ddc885c91df2b +F src/main.c 28171059466a5dac18160c466b978eaafdbb3594 F src/malloc.c a08f16d134f0bfab6b20c3cd142ebf3e58235a6a F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 F src/mem1.c 89d4ea8d5cdd55635cbaa48ad53132af6294cbb2 @@ -153,10 +153,10 @@ F src/os.c 8bc63cf91e9802e2b807198e54e50227fa889306 F src/os.h 534b082c3cb349ad05fa6fa0b06087e022af282c F src/os_common.h 0d6ee583b6ee3185eb9d951f890c6dd03021a08d F src/os_os2.c 8ad77a418630d7dee91d1bb04f79c2096301d3a0 -F src/os_unix.c f0e002e74108c8a3f70c63dd91419ab6bcafd095 +F src/os_unix.c bf4cc543fcb32dbcb25a88cf817c761ceef14fdd F src/os_win.c a8fc01d8483be472e495793c01064fd87e56a5c1 -F src/pager.c b4a41030860229e80295fa1f37addab24d21799c -F src/pager.h cee4487ab4f0911dd9f22a40e3cd55afdb7ef444 +F src/pager.c 07c3522319a449db9dc732da828a09c4c55c41a4 +F src/pager.h 934b598583a9d936bb13c37d62a2fe68ac48781c F src/parse.y ace5c7a125d9f2a410e431ee3209034105045f7e F src/pcache.c ace8f6a5ecd4711cc66a1b23053be7109bd437cf F src/pcache.h c683390d50f856d4cd8e24342ae62027d1bb6050 @@ -169,14 +169,14 @@ F src/resolve.c ac5f1a713cd1ae77f08b83cc69581e11bf5ae6f9 F src/rowset.c 69afa95a97c524ba6faf3805e717b5b7ae85a697 F src/select.c c03d8a0565febcde8c6a12c5d77d065fddae889b F src/shell.c c40427c7245535a04a9cb4a417b6cc05c022e6a4 -F src/sqlite.h.in 0cc43a0fa5d26cd26d2609b84ecf094a159e9cb9 +F src/sqlite.h.in a710846e91159742ee3ddcbabb56eb7c7478e3a5 F src/sqlite3ext.h 69dfb8116af51b84a029cddb3b35062354270c89 -F src/sqliteInt.h 700a2df7b8dfe57c3b8d83c52ff40928e026220c +F src/sqliteInt.h 32dfe6294b46914fb567db493994bf902fb0f8c9 F src/sqliteLimit.h 3afab2291762b5d09ae20c18feb8e9fa935a60a6 F src/status.c 4df6fe7dce2d256130b905847c6c60055882bdbe F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e F src/tclsqlite.c 4de81521174fedacd8393ea7b70b730ce17f8eae -F src/test1.c 64b5b8135080b94370e8100e5066bb394f5c3122 +F src/test1.c a449144907face6414f68f6a5663a5b8cadabbd4 F src/test2.c b6b43413d495addd039a88b87d65c839f86b18cb F src/test3.c 4c21700c73a890a47fc685c1097bfb661346ac94 F src/test4.c ad03bb987ddedce928f4258c1e7fa4109a73497d @@ -215,17 +215,17 @@ F src/update.c c0dc6b75ad28b76b619042d934f337b02acee208 F src/utf.c 1baeeac91707a4df97ccc6141ec0f808278af685 F src/util.c 32aebf04c10e51ad3977a928b7416bed671b620b F src/vacuum.c 8e7d9a6fd076774bb6e211cad335c7337948a164 -F src/vdbe.c da7ee09fdb5ee6ecfac11b74086834ff6583cd23 +F src/vdbe.c 21a96eeeb43dedd40072028db0b42e137296a81a F src/vdbe.h 471f6a3dcec4817ca33596fe7f6654d56c0e75f3 F src/vdbeInt.h 19ebc8c2a2e938340051ee65af3f377fb99102d1 -F src/vdbeapi.c 466044df5bc916f778833e927165fd02cdef6086 +F src/vdbeapi.c 85e448c7d1d6dde7226c721cd8da8f9b5286699d F src/vdbeaux.c be6ca4bb330c7730b358dd627d6d632ca93482c9 F src/vdbeblob.c 5327132a42a91e8b7acfb60b9d2c3b1c5c863e0e F src/vdbemem.c 2a82f455f6ca6f78b59fb312f96054c04ae0ead1 F src/vdbetrace.c 864cef96919323482ebd9986f2132435115e9cc2 F src/vtab.c a0f8a40274e4261696ef57aa806de2776ab72cda -F src/wal.c b1c6868b975a67f6f4dd2cd612eeace4117eb98f -F src/wal.h c60781e78e394af07ece3b64a11192eb442241c1 +F src/wal.c 1c984ba3bba07a9efb4974837d12daef8f0d80c4 +F src/wal.h d0a7048e68a86d3fe4639858d7f592606545d224 F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f F src/where.c faadd9c2bf08868e5135192b44e0d753e363a885 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 @@ -811,14 +811,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P 1bde41cf081570ad257f927b641e752dff4ed014 -R a7ee89d9d5eb7d4f4fd9d69824beab31 +P 706611283ea2575c2942543391026b36061cfc1c +R d5f4a8af6d2162b2c802208b97207bbd U drh -Z 15bd57757c5a0ce16c3065e9d45dae67 +Z eca35982e551d3fde58c618a2bb3d812 -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.6 (GNU/Linux) -iD4DBQFL2baVoxKgR168RlERArO2AJ9g8Vnb6XlVDltcg68idEyJzFPcYwCYh7Xs -0Mdctk0jP1xbHiTJhHLX+A== -=minX +iD8DBQFL2glkoxKgR168RlERAm4BAJ9/6BoUJM0bQ0jccd1czYE2rrEefgCfYnyN +XCr+jA+ZIb6AheCdZkTnnE4= +=Dx/c -----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index ca7b008f92..78f7afdb38 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -706611283ea2575c2942543391026b36061cfc1c \ No newline at end of file +62db5fa3b61be885b2d94e9b9ce3877b2c588350 \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index 62a8410764..e60754e582 100644 --- a/src/btree.c +++ b/src/btree.c @@ -2278,7 +2278,7 @@ static int lockBtree(BtShared *pBt){ */ if( page1[19]==2 && pBt->doNotUseWAL==0 ){ int isOpen = 0; - rc = sqlite3PagerOpenLog(pBt->pPager, &isOpen); + rc = sqlite3PagerOpenWal(pBt->pPager, &isOpen); if( rc!=SQLITE_OK ){ goto page1_init_failed; }else if( isOpen==0 ){ diff --git a/src/main.c b/src/main.c index 3e7c1257c2..c723a4ef55 100644 --- a/src/main.c +++ b/src/main.c @@ -1197,9 +1197,9 @@ void *sqlite3_wal_hook( ){ void *pRet; sqlite3_mutex_enter(db->mutex); - pRet = db->pLogArg; - db->xLogCallback = xCallback; - db->pLogArg = pArg; + pRet = db->pWalArg; + db->xWalCallback = xCallback; + db->pWalArg = pArg; sqlite3_mutex_leave(db->mutex); return pRet; } diff --git a/src/os_unix.c b/src/os_unix.c index 248309664d..0e525c0732 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -5049,6 +5049,7 @@ static int unixShmClose(sqlite3_shm *pSharedMem){ unixShm **pp; /* For looping over sibling connections */ int nRef; /* Number of connections to pFile */ + if( pSharedMem==0 ) return SQLITE_OK; p = (struct unixShm*)pSharedMem; pFile = p->pFile; @@ -5092,7 +5093,7 @@ static int unixShmSize( sqlite3_shm *pSharedMem, /* Pointer returned by unixShmOpen() */ int reqSize, /* Requested size. -1 for query only */ int *pNewSize, /* Write new size here */ - char **ppBuf /* Write new buffer origin here */ + void **ppBuf /* Write new buffer origin here */ ){ unixShm *p = (unixShm*)pSharedMem; unixShmFile *pFile = p->pFile; @@ -5165,7 +5166,7 @@ static int unixShmLock( || desiredLock==p->lockState || (desiredLock==SQLITE_SHM_READ && p->lockState==SQLITE_SHM_READ_FULL) ){ - *pGotLock = p->lockState; + if( pGotLock ) *pGotLock = p->lockState; return SQLITE_OK; } @@ -5257,7 +5258,7 @@ static int unixShmLock( } } sqlite3_mutex_leave(pFile->mutex); - *pGotLock = p->lockState; + if( pGotLock ) *pGotLock = p->lockState; return rc; } diff --git a/src/pager.c b/src/pager.c index bd704fa7c7..0ad245a8c4 100644 --- a/src/pager.c +++ b/src/pager.c @@ -399,7 +399,7 @@ struct Pager { char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */ PCache *pPCache; /* Pointer to page cache object */ sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */ - Log *pLog; /* Log used by "journal_mode=wal" */ + Wal *pWal; /* Write-ahead log used by "journal_mode=wal" */ }; /* @@ -1192,8 +1192,8 @@ static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){ ** Return true if this pager uses a write-ahead log instead of the usual ** rollback journal. Otherwise false. */ -static int pagerUseLog(Pager *pPager){ - return (pPager->pLog!=0); +static int pagerUseWal(Pager *pPager){ + return (pPager->pWal!=0); } /* @@ -1227,8 +1227,8 @@ static void pager_unlock(Pager *pPager){ */ pPager->dbSizeValid = 0; - if( pagerUseLog(pPager) ){ - sqlite3WalCloseSnapshot(pPager->pLog); + if( pagerUseWal(pPager) ){ + sqlite3WalCloseSnapshot(pPager->pWal); }else{ rc = osUnlock(pPager->fd, NO_LOCK); } @@ -1380,7 +1380,7 @@ static int pager_end_transaction(Pager *pPager, int hasMaster){ assert( isOpen(pPager->jfd) || pPager->pInJournal==0 ); if( isOpen(pPager->jfd) ){ - assert( !pagerUseLog(pPager) ); + assert( !pagerUseWal(pPager) ); /* Finalize the journal file. */ if( sqlite3IsMemJournal(pPager->jfd) ){ @@ -1426,8 +1426,8 @@ static int pager_end_transaction(Pager *pPager, int hasMaster){ pPager->nRec = 0; sqlite3PcacheCleanAll(pPager->pPCache); - if( pagerUseLog(pPager) ){ - rc2 = sqlite3WalWriteLock(pPager->pLog, 0); + if( pagerUseWal(pPager) ){ + rc2 = sqlite3WalWriteLock(pPager->pWal, 0); pPager->state = PAGER_SHARED; }else if( !pPager->exclusiveMode ){ rc2 = osUnlock(pPager->fd, SHARED_LOCK); @@ -1540,7 +1540,7 @@ static int pager_playback_one_page( aData = pPager->pTmpSpace; assert( aData ); /* Temp storage must have already been allocated */ - assert( pagerUseLog(pPager)==0 || (!isMainJrnl && isSavepnt) ); + assert( pagerUseWal(pPager)==0 || (!isMainJrnl && isSavepnt) ); /* Read the page number and page data from the journal or sub-journal ** file. Return an error code to the caller if an IO error occurs. @@ -1610,7 +1610,7 @@ static int pager_playback_one_page( ** is possible to fail a statement on a database that does not yet exist. ** Do not attempt to write if database file has never been opened. */ - if( pagerUseLog(pPager) ){ + if( pagerUseWal(pPager) ){ pPg = 0; }else{ pPg = pager_lookup(pPager, pgno); @@ -1631,7 +1631,7 @@ static int pager_playback_one_page( ){ i64 ofst = (pgno-1)*(i64)pPager->pageSize; testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 ); - assert( !pagerUseLog(pPager) ); + assert( !pagerUseWal(pPager) ); rc = sqlite3OsWrite(pPager->fd, (u8*)aData, pPager->pageSize, ofst); if( pgno>pPager->dbFileSize ){ pPager->dbFileSize = pgno; @@ -1696,7 +1696,7 @@ static int pager_playback_one_page( ** segment is synced. If a crash occurs during or following this, ** database corruption may ensue. */ - assert( !pagerUseLog(pPager) ); + assert( !pagerUseWal(pPager) ); sqlite3PcacheMakeClean(pPg); } #ifdef SQLITE_CHECK_PAGES @@ -2188,7 +2188,7 @@ static int readDbPage(PgHdr *pPg){ Pgno pgno = pPg->pgno; /* Page number to read */ int rc = SQLITE_OK; /* Return code */ i64 iOffset; /* Byte offset of file to read from */ - int isInLog = 0; /* True if page is in log file */ + int isInWal = 0; /* True if page is in log file */ assert( pPager->state>=PAGER_SHARED && !MEMDB ); assert( isOpen(pPager->fd) ); @@ -2199,11 +2199,11 @@ static int readDbPage(PgHdr *pPg){ return SQLITE_OK; } - if( pagerUseLog(pPager) ){ + if( pagerUseWal(pPager) ){ /* Try to pull the page from the write-ahead log. */ - rc = sqlite3WalRead(pPager->pLog, pgno, &isInLog, pPg->pData); + rc = sqlite3WalRead(pPager->pWal, pgno, &isInWal, pPg->pData); } - if( rc==SQLITE_OK && !isInLog ){ + if( rc==SQLITE_OK && !isInWal ){ iOffset = (pgno-1)*(i64)pPager->pageSize; rc = sqlite3OsRead(pPager->fd, pPg->pData, pPager->pageSize, iOffset); if( rc==SQLITE_IOERR_SHORT_READ ){ @@ -2278,7 +2278,7 @@ static int pagerUndoCallback(void *pCtx, Pgno iPg){ /* ** This function is called to rollback a transaction on a WAL database. */ -static int pagerRollbackLog(Pager *pPager){ +static int pagerRollbackWal(Pager *pPager){ int rc; /* Return Code */ PgHdr *pList; /* List of dirty pages to revert */ @@ -2290,7 +2290,7 @@ static int pagerRollbackLog(Pager *pPager){ ** also copied into the backup databases) as part of this transaction, ** the backups must be restarted. */ - if( sqlite3WalDirty(pPager->pLog) ){ + if( sqlite3WalDirty(pPager->pWal) ){ sqlite3BackupRestart(pPager->pBackup); } @@ -2302,7 +2302,7 @@ static int pagerRollbackLog(Pager *pPager){ ** + Reload page content from the database (if refcount>0). */ pPager->dbSize = pPager->dbOrigSize; - rc = sqlite3WalUndo(pPager->pLog, pagerUndoCallback, (void *)pPager); + rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager); pList = sqlite3PcacheDirtyList(pPager->pPCache); while( pList && rc==SQLITE_OK ){ PgHdr *pNext = pList->pDirty; @@ -2370,8 +2370,8 @@ static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){ */ pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize; - if( !pSavepoint && pagerUseLog(pPager) ){ - return pagerRollbackLog(pPager); + if( !pSavepoint && pagerUseWal(pPager) ){ + return pagerRollbackWal(pPager); } /* Use pPager->journalOff as the effective size of the main rollback @@ -2380,7 +2380,7 @@ static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){ ** past pPager->journalOff is off-limits to us. */ szJ = pPager->journalOff; - assert( pagerUseLog(pPager)==0 || szJ==0 ); + assert( pagerUseWal(pPager)==0 || szJ==0 ); /* Begin by rolling back records from the main journal starting at ** PagerSavepoint.iOffset and continuing to the next journal header. @@ -2389,7 +2389,7 @@ static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){ ** will be skipped automatically. Pages are added to pDone as they ** are played back. */ - if( pSavepoint && !pagerUseLog(pPager) ){ + if( pSavepoint && !pagerUseWal(pPager) ){ iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ; pPager->journalOff = pSavepoint->iOffset; while( rc==SQLITE_OK && pPager->journalOffiSubRec*(4+pPager->pageSize); - if( pagerUseLog(pPager) ){ - rc = sqlite3WalSavepointUndo(pPager->pLog, pSavepoint->iFrame); + if( pagerUseWal(pPager) ){ + rc = sqlite3WalSavepointUndo(pPager->pWal, pSavepoint->iFrame); } for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && iinSubRec; ii++){ assert( offset==ii*(4+pPager->pageSize) ); @@ -2750,8 +2750,8 @@ int sqlite3PagerPagecount(Pager *pPager, int *pnPage){ int rc; /* Error returned by OsFileSize() */ i64 n = 0; /* File size in bytes returned by OsFileSize() */ - if( pagerUseLog(pPager) ){ - sqlite3WalDbsize(pPager->pLog, &nPage); + if( pagerUseWal(pPager) ){ + sqlite3WalDbsize(pPager->pWal, &nPage); } if( nPage==0 ){ @@ -2936,10 +2936,10 @@ int sqlite3PagerClose(Pager *pPager){ sqlite3BeginBenignMalloc(); pPager->errCode = 0; pPager->exclusiveMode = 0; - sqlite3WalClose(pPager->pLog, pPager->fd, + sqlite3WalClose(pPager->pWal, pPager->fd, (pPager->noSync ? 0 : pPager->sync_flags), pTmp ); - pPager->pLog = 0; + pPager->pWal = 0; pager_reset(pPager); if( MEMDB ){ pager_unlock(pPager); @@ -3176,7 +3176,7 @@ static int pager_write_pagelist(PgHdr *pList){ ** EXCLUSIVE, it means the database file has been changed and any rollback ** will require a journal playback. */ - assert( !pagerUseLog(pList->pPager) ); + assert( !pagerUseWal(pList->pPager) ); assert( pPager->state>=PAGER_RESERVED ); rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK); @@ -3265,7 +3265,7 @@ static int subjournalPage(PgHdr *pPg){ CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2); PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno)); - assert( pagerUseLog(pPager) + assert( pagerUseWal(pPager) || pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize ); @@ -3288,7 +3288,7 @@ static int subjournalPage(PgHdr *pPg){ ** this function notifies any active backup processes that the pages have ** changed. */ -static int pagerLogFrames( +static int pagerWalFrames( Pager *pPager, /* Pager object */ PgHdr *pList, /* List of frames to log */ Pgno nTruncate, /* Database size after this commit */ @@ -3297,8 +3297,8 @@ static int pagerLogFrames( ){ int rc; /* Return code */ - assert( pPager->pLog ); - rc = sqlite3WalFrames(pPager->pLog, + assert( pPager->pWal ); + rc = sqlite3WalFrames(pPager->pWal, pPager->pageSize, pList, nTruncate, isCommit, sync_flags ); if( rc==SQLITE_OK && pPager->pBackup ){ @@ -3337,13 +3337,13 @@ static int pagerStress(void *p, PgHdr *pPg){ assert( pPg->flags&PGHDR_DIRTY ); pPg->pDirty = 0; - if( pagerUseLog(pPager) ){ + if( pagerUseWal(pPager) ){ /* Write a single frame for this page to the log. */ if( subjRequiresPage(pPg) ){ rc = subjournalPage(pPg); } if( rc==SQLITE_OK ){ - rc = pagerLogFrames(pPager, pPg, 0, 0, 0); + rc = pagerWalFrames(pPager, pPg, 0, 0, 0); } }else{ /* The doNotSync flag is set by the sqlite3PagerWrite() function while it @@ -3836,12 +3836,12 @@ static int pagerHasWAL(Pager *pPager, int *pExists){ int rc; /* Return code */ if( !pPager->tempFile ){ - char *zLog = sqlite3_mprintf("%s-wal", pPager->zFilename); - if( !zLog ){ + char *zWal = sqlite3_mprintf("%s-wal", pPager->zFilename); + if( !zWal ){ rc = SQLITE_NOMEM; }else{ - rc = sqlite3OsAccess(pPager->pVfs, zLog, SQLITE_ACCESS_EXISTS, pExists); - sqlite3_free(zLog); + rc = sqlite3OsAccess(pPager->pVfs, zWal, SQLITE_ACCESS_EXISTS, pExists); + sqlite3_free(zWal); } }else{ rc = SQLITE_OK; @@ -3854,9 +3854,9 @@ static int pagerOpenSnapshot(Pager *pPager){ int rc; /* Return code */ int changed = 0; /* True if cache must be reset */ - assert( pagerUseLog(pPager) ); + assert( pagerUseWal(pPager) ); - rc = sqlite3WalOpenSnapshot(pPager->pLog, &changed); + rc = sqlite3WalOpenSnapshot(pPager->pWal, &changed); if( rc==SQLITE_OK ){ int dummy; if( changed ){ @@ -3923,7 +3923,7 @@ int sqlite3PagerSharedLock(Pager *pPager){ pager_reset(pPager); } - if( pagerUseLog(pPager) ){ + if( pagerUseWal(pPager) ){ rc = pagerOpenSnapshot(pPager); }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){ sqlite3_vfs * const pVfs = pPager->pVfs; @@ -4089,7 +4089,7 @@ int sqlite3PagerSharedLock(Pager *pPager){ } if( isWal ){ pager_reset(pPager); - rc = sqlite3PagerOpenLog(pPager, 0); + rc = sqlite3PagerOpenWal(pPager, 0); if( rc==SQLITE_OK ){ rc = pagerOpenSnapshot(pPager); } @@ -4332,7 +4332,7 @@ void sqlite3PagerUnref(DbPage *pPg){ */ static int openSubJournal(Pager *pPager){ int rc = SQLITE_OK; - if( (pagerUseLog(pPager) || isOpen(pPager->jfd)) && !isOpen(pPager->sjfd) ){ + if( (pagerUseWal(pPager) || isOpen(pPager->jfd)) && !isOpen(pPager->sjfd) ){ if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){ sqlite3MemJournalOpen(pPager->sjfd); }else{ @@ -4468,13 +4468,13 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ assert( pPager->pInJournal==0 ); assert( !MEMDB && !pPager->tempFile ); - if( pagerUseLog(pPager) ){ + if( pagerUseWal(pPager) ){ /* Grab the write lock on the log file. If successful, upgrade to ** PAGER_EXCLUSIVE state. Otherwise, return an error code to the caller. ** The busy-handler is not invoked if another connection already ** holds the write-lock. If possible, the upper layer will call it. */ - rc = sqlite3WalWriteLock(pPager->pLog, 1); + rc = sqlite3WalWriteLock(pPager->pWal, 1); if( rc==SQLITE_OK ){ pPager->dbOrigSize = pPager->dbSize; pPager->state = PAGER_RESERVED; @@ -4507,7 +4507,7 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ ** kept open and either was truncated to 0 bytes or its header was ** overwritten with zeros. */ - assert( pagerUseLog(pPager)==0 ); + assert( pagerUseWal(pPager)==0 ); assert( pPager->nRec==0 ); assert( pPager->dbOrigSize==0 ); assert( pPager->pInJournal==0 ); @@ -4562,7 +4562,7 @@ static int pager_write(PgHdr *pPg){ */ sqlite3PcacheMakeDirty(pPg); if( pageInJournal(pPg) && !subjRequiresPage(pPg) ){ - assert( !pagerUseLog(pPager) ); + assert( !pagerUseWal(pPager) ); pPager->dbModified = 1; }else{ @@ -4580,7 +4580,7 @@ static int pager_write(PgHdr *pPg){ } if( !isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_OFF - && !pagerUseLog(pPager) + && !pagerUseWal(pPager) ){ assert( pPager->useJournal ); rc = pager_open_journal(pPager); @@ -4593,7 +4593,7 @@ static int pager_write(PgHdr *pPg){ ** the transaction journal if it is not there already. */ if( !pageInJournal(pPg) && isOpen(pPager->jfd) ){ - assert( !pagerUseLog(pPager) ); + assert( !pagerUseWal(pPager) ); if( pPg->pgno<=pPager->dbOrigSize ){ u32 cksum; char *pData2; @@ -4974,10 +4974,10 @@ int sqlite3PagerCommitPhaseOne( */ sqlite3BackupRestart(pPager->pBackup); }else if( pPager->state!=PAGER_SYNCED && pPager->dbModified ){ - if( pagerUseLog(pPager) ){ + if( pagerUseWal(pPager) ){ PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache); if( pList ){ - rc = pagerLogFrames(pPager, pList, pPager->dbSize, 1, + rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1, (pPager->fullSync ? pPager->sync_flags : 0) ); } @@ -5213,7 +5213,7 @@ int sqlite3PagerCommitPhaseTwo(Pager *pPager){ int sqlite3PagerRollback(Pager *pPager){ int rc = SQLITE_OK; /* Return code */ PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager))); - if( pagerUseLog(pPager) ){ + if( pagerUseWal(pPager) ){ int rc2; rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1); @@ -5360,8 +5360,8 @@ int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){ if( !aNew[ii].pInSavepoint ){ return SQLITE_NOMEM; } - if( pagerUseLog(pPager) ){ - aNew[ii].iFrame = sqlite3WalSavepoint(pPager->pLog); + if( pagerUseWal(pPager) ){ + aNew[ii].iFrame = sqlite3WalSavepoint(pPager->pWal); } } @@ -5440,7 +5440,7 @@ int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){ ** not yet been opened. In this case there have been no changes to ** the database file, so the playback operation can be skipped. */ - else if( pagerUseLog(pPager) || isOpen(pPager->jfd) ){ + else if( pagerUseWal(pPager) || isOpen(pPager->jfd) ){ PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1]; rc = pagerPlaybackSavepoint(pPager, pSavepoint); assert(rc!=SQLITE_DONE); @@ -5796,9 +5796,9 @@ sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){ */ int sqlite3PagerCheckpoint(Pager *pPager){ int rc = SQLITE_OK; - if( pPager->pLog ){ + if( pPager->pWal ){ u8 *zBuf = (u8 *)pPager->pTmpSpace; - rc = sqlite3WalCheckpoint(pPager->pLog, pPager->fd, + rc = sqlite3WalCheckpoint(pPager->pWal, pPager->fd, (pPager->noSync ? 0 : pPager->sync_flags), zBuf, pPager->xBusyHandler, pPager->pBusyHandlerArg ); @@ -5806,8 +5806,8 @@ int sqlite3PagerCheckpoint(Pager *pPager){ return rc; } -int sqlite3PagerLogCallback(Pager *pPager){ - return sqlite3WalCallback(pPager->pLog); +int sqlite3PagerWalCallback(Pager *pPager){ + return sqlite3WalCallback(pPager->pWal); } /* @@ -5817,17 +5817,17 @@ int sqlite3PagerLogCallback(Pager *pPager){ ** The caller must be holding a SHARED lock on the database file to call ** this function. */ -int sqlite3PagerOpenLog(Pager *pPager, int *pisOpen){ +int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen){ int rc = SQLITE_OK; /* Return code */ assert( pPager->state>=PAGER_SHARED ); - if( !pPager->pLog ){ + if( !pPager->pWal ){ /* Open the connection to the log file. If this operation fails, ** (e.g. due to malloc() failure), unlock the database file and ** return an error code. */ - rc = sqlite3WalOpen(pPager->pVfs, pPager->zFilename, &pPager->pLog); + rc = sqlite3WalOpen(pPager->pVfs, pPager->zFilename, &pPager->pWal); if( rc==SQLITE_OK ){ pPager->journalMode = PAGER_JOURNALMODE_WAL; } @@ -5848,7 +5848,7 @@ int sqlite3PagerOpenLog(Pager *pPager, int *pisOpen){ ** error (SQLITE_BUSY) is returned and the log connection is not closed. ** If successful, the EXCLUSIVE lock is not released before returning. */ -int sqlite3PagerCloseLog(Pager *pPager){ +int sqlite3PagerCloseWal(Pager *pPager){ int rc = SQLITE_OK; assert( pPager->journalMode==PAGER_JOURNALMODE_WAL ); @@ -5857,28 +5857,28 @@ int sqlite3PagerCloseLog(Pager *pPager){ ** it may need to be checkpointed before the connection can switch to ** rollback mode. Open it now so this can happen. */ - if( !pPager->pLog ){ + if( !pPager->pWal ){ int logexists = 0; rc = sqlite3OsLock(pPager->fd, SQLITE_LOCK_SHARED); if( rc==SQLITE_OK ){ rc = pagerHasWAL(pPager, &logexists); } if( rc==SQLITE_OK && logexists ){ - rc = sqlite3WalOpen(pPager->pVfs, pPager->zFilename, &pPager->pLog); + rc = sqlite3WalOpen(pPager->pVfs, pPager->zFilename, &pPager->pWal); } } /* Checkpoint and close the log. Because an EXCLUSIVE lock is held on ** the database file, the log and log-summary files will be deleted. */ - if( rc==SQLITE_OK && pPager->pLog ){ + if( rc==SQLITE_OK && pPager->pWal ){ rc = sqlite3OsLock(pPager->fd, SQLITE_LOCK_EXCLUSIVE); if( rc==SQLITE_OK ){ - rc = sqlite3WalClose(pPager->pLog, pPager->fd, + rc = sqlite3WalClose(pPager->pWal, pPager->fd, (pPager->noSync ? 0 : pPager->sync_flags), (u8*)pPager->pTmpSpace ); - pPager->pLog = 0; + pPager->pWal = 0; } } return rc; diff --git a/src/pager.h b/src/pager.h index 7b760c9b1a..91b2cb32f9 100644 --- a/src/pager.h +++ b/src/pager.h @@ -135,9 +135,9 @@ int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint); int sqlite3PagerSharedLock(Pager *pPager); int sqlite3PagerCheckpoint(Pager *pPager); -int sqlite3PagerLogCallback(Pager *pPager); -int sqlite3PagerOpenLog(Pager *pPager, int *pisOpen); -int sqlite3PagerCloseLog(Pager *pPager); +int sqlite3PagerWalCallback(Pager *pPager); +int sqlite3PagerOpenWal(Pager *pPager, int *pisOpen); +int sqlite3PagerCloseWal(Pager *pPager); /* Functions used to query pager state and configuration. */ u8 sqlite3PagerIsreadonly(Pager*); diff --git a/src/sqlite.h.in b/src/sqlite.h.in index 70e7d26a8c..35ac5bd008 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -844,7 +844,7 @@ struct sqlite3_vfs { ** definition. Those that follow are added in version 2 or later */ int (*xShmOpen)(sqlite3_vfs*, const char *zName, sqlite3_shm**); - int (*xShmSize)(sqlite3_shm*, int reqSize, int *pNewSize, char**); + int (*xShmSize)(sqlite3_shm*, int reqSize, int *pNewSize, void**); int (*xShmRelease)(sqlite3_shm*); int (*xShmPush)(sqlite3_shm*); int (*xShmPull)(sqlite3_shm*); diff --git a/src/sqliteInt.h b/src/sqliteInt.h index 7fbe053403..54effce127 100644 --- a/src/sqliteInt.h +++ b/src/sqliteInt.h @@ -823,8 +823,8 @@ struct sqlite3 { void (*xRollbackCallback)(void*); /* Invoked at every commit. */ void *pUpdateArg; void (*xUpdateCallback)(void*,int, const char*,const char*,sqlite_int64); - int (*xLogCallback)(void *, sqlite3 *, const char *, int); - void *pLogArg; + int (*xWalCallback)(void *, sqlite3 *, const char *, int); + void *pWalArg; void(*xCollNeeded)(void*,sqlite3*,int eTextRep,const char*); void(*xCollNeeded16)(void*,sqlite3*,int eTextRep,const void*); void *pCollNeededArg; diff --git a/src/test1.c b/src/test1.c index 6f20cf9a90..f3eff8c672 100644 --- a/src/test1.c +++ b/src/test1.c @@ -5113,7 +5113,7 @@ int Sqlitetest1_Init(Tcl_Interp *interp){ #ifdef SQLITE_TEST extern char sqlite3_query_plan[]; static char *query_plan = sqlite3_query_plan; - extern int sqlite3_walsummary_mmap_incr; /* In wal.c */ +/* extern int sqlite3_walsummary_mmap_incr; // In wal.c */ #ifdef SQLITE_ENABLE_FTS3 extern int sqlite3_fts3_enable_parentheses; #endif @@ -5173,8 +5173,8 @@ int Sqlitetest1_Init(Tcl_Interp *interp){ #ifdef SQLITE_TEST Tcl_LinkVar(interp, "sqlite_query_plan", (char*)&query_plan, TCL_LINK_STRING|TCL_LINK_READ_ONLY); - Tcl_LinkVar(interp, "sqlite_walsummary_mmap_incr", - (char*)&sqlite3_walsummary_mmap_incr, TCL_LINK_INT); +/* Tcl_LinkVar(interp, "sqlite_walsummary_mmap_incr", + (char*)&sqlite3_walsummary_mmap_incr, TCL_LINK_INT); */ #endif #ifdef SQLITE_DEBUG Tcl_LinkVar(interp, "sqlite_addop_trace", diff --git a/src/vdbe.c b/src/vdbe.c index a8eddf857f..60e72fadd2 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -5252,11 +5252,11 @@ case OP_JournalMode: { if( eOld==PAGER_JOURNALMODE_WAL ){ /* If leaving WAL mode, close the log file. If successful, the call - ** to PagerCloseLog() checkpoints and deletes the write-ahead-log + ** to PagerCloseWal() checkpoints and deletes the write-ahead-log ** file. An EXCLUSIVE lock may still be held on the database file ** after a successful return. */ - rc = sqlite3PagerCloseLog(pPager); + rc = sqlite3PagerCloseWal(pPager); if( rc!=SQLITE_OK ) goto abort_due_to_error; sqlite3PagerJournalMode(pPager, eNew); }else{ diff --git a/src/vdbeapi.c b/src/vdbeapi.c index 1c938ee5d2..12100ce56b 100644 --- a/src/vdbeapi.c +++ b/src/vdbeapi.c @@ -306,15 +306,15 @@ void sqlite3_result_error_nomem(sqlite3_context *pCtx){ pCtx->s.db->mallocFailed = 1; } -static int doLogCallbacks(sqlite3 *db){ +static int doWalCallbacks(sqlite3 *db){ int i; int rc = SQLITE_OK; for(i=0; inDb; i++){ Btree *pBt = db->aDb[i].pBt; if( pBt ){ - int nEntry = sqlite3PagerLogCallback(sqlite3BtreePager(pBt)); - if( db->xLogCallback && nEntry>0 && rc==SQLITE_OK - && db->xLogCallback(db->pLogArg, db, db->aDb[i].zName, nEntry) + int nEntry = sqlite3PagerWalCallback(sqlite3BtreePager(pBt)); + if( db->xWalCallback && nEntry>0 && rc==SQLITE_OK + && db->xWalCallback(db->pWalArg, db, db->aDb[i].zName, nEntry) ){ rc = sqlite3PagerCheckpoint(sqlite3BtreePager(pBt)); } @@ -406,7 +406,7 @@ static int sqlite3Step(Vdbe *p){ if( rc==SQLITE_DONE ){ assert( p->rc==SQLITE_OK ); - p->rc = doLogCallbacks(db); + p->rc = doWalCallbacks(db); if( p->rc!=SQLITE_OK ){ rc = SQLITE_ERROR; } diff --git a/src/wal.c b/src/wal.c index 8a8f4043d1..2de53b8b18 100644 --- a/src/wal.c +++ b/src/wal.c @@ -1,22 +1,33 @@ - /* -** This file contains the implementation of a log file used in +** 2010 February 1 +** +** The author disclaims copyright to this source code. In place of +** a legal notice, here is a blessing: +** +** May you do good and not evil. +** May you find forgiveness for yourself and forgive others. +** May you share freely, never taking more than you give. +** +************************************************************************* +** +** This file contains the implementation of a write-ahead log file used in ** "journal_mode=wal" mode. */ +#include "wal.h" /* -** LOG FILE FORMAT +** WRITE-AHEAD LOG (WAL) FILE FORMAT ** -** A log file consists of a header followed by zero or more log frames. -** The log header is 12 bytes in size and consists of the following three +** A wal file consists of a header followed by zero or more "frames". +** The header is 12 bytes in size and consists of the following three ** big-endian 32-bit unsigned integer values: ** ** 0: Database page size, ** 4: Randomly selected salt value 1, ** 8: Randomly selected salt value 2. ** -** Immediately following the log header are zero or more log frames. Each +** Immediately following the header are zero or more frames. Each ** frame itself consists of a 16-byte header followed by a bytes ** of page data. The header is broken into 4 big-endian 32-bit unsigned ** integer values, as follows: @@ -29,56 +40,49 @@ */ /* -** LOG SUMMARY FILE FORMAT +** WAL-INDEX FILE FORMAT ** -** The log-summary file consists of a header region, followed by an -** region that contains no useful data (used to apply byte-range locks +** The wal-index file consists of a 32-byte header region, followed by an +** 8-byte region that contains no useful data (used to apply byte-range locks ** to), followed by the data region. ** ** The contents of both the header and data region are specified in terms ** of 1, 2 and 4 byte unsigned integers. All integers are stored in -** machine-endian order. +** machine-endian order. The wal-index is not a persistent file and +** so it does not need to be portable across archtectures. ** -** A log-summary file is essentially a shadow-pager map. It contains a -** mapping from database page number to the set of locations in the log +** A wal-index file is essentially a shadow-pager map. It contains a +** mapping from database page number to the set of locations in the wal ** file that contain versions of the database page. When a database -** client needs to read a page of data, it first queries the log-summary +** client needs to read a page of data, it first queries the wal-index ** file to determine if the required version of the page is stored in -** the log. If so, it is read from the log file. If not, it is read from -** the database file. +** the wal. If so, the page is read from the wal. If not, the page is +** read from the database file. ** -** Whenever a transaction is appended to the log or a checkpoint transfers -** data from the log file into the database file, the log-summary is +** Whenever a transaction is appended to the wal or a checkpoint transfers +** data from the wal into the database file, the wal-index is ** updated accordingly. ** -** The fields in the log-summary file header are described in the comment -** directly above the definition of struct LogSummaryHdr (see below). -** Immediately following the fields in the LogSummaryHdr structure is +** The fields in the wal-index file header are described in the comment +** directly above the definition of struct WalIndexHdr (see below). +** Immediately following the fields in the WalIndexHdr structure is ** an 8 byte checksum based on the contents of the header. This field is -** not the same as the iCheck1 and iCheck2 fields of the LogSummaryHdr. +** not the same as the iCheck1 and iCheck2 fields of the WalIndexHdr. */ -#include "wal.h" - -#include -#include -#include - -typedef struct LogSummaryHdr LogSummaryHdr; -typedef struct LogSummary LogSummary; -typedef struct LogIterator LogIterator; -typedef struct LogLock LogLock; +/* Object declarations */ +typedef struct WalIndexHdr WalIndexHdr; +typedef struct WalIterator WalIterator; /* -** The following structure may be used to store the same data that -** is stored in the log-summary header. +** The following object stores a copy of the wal-index header. ** ** Member variables iCheck1 and iCheck2 contain the checksum for the -** last frame written to the log, or 2 and 3 respectively if the log +** last frame written to the wal, or 2 and 3 respectively if the log ** is currently empty. */ -struct LogSummaryHdr { +struct WalIndexHdr { u32 iChange; /* Counter incremented each transaction */ u32 pgsz; /* Database page size in bytes */ u32 iLastPg; /* Address of last valid frame in log */ @@ -87,200 +91,46 @@ struct LogSummaryHdr { u32 iCheck2; /* Checkpoint value 2 */ }; -/* Size of serialized LogSummaryHdr object. */ -#define LOGSUMMARY_HDR_NFIELD (sizeof(LogSummaryHdr) / sizeof(u32)) +/* Size of serialized WalIndexHdr object. */ +#define WALINDEX_HDR_NFIELD (sizeof(WalIndexHdr) / sizeof(u32)) -/* A block of 16 bytes beginning at LOGSUMMARY_LOCK_OFFSET is reserved +/* A block of 16 bytes beginning at WALINDEX_LOCK_OFFSET is reserved ** for locks. Since some systems only feature mandatory file-locks, we ** do not read or write data from the region of the file on which locks ** are applied. */ -#define LOGSUMMARY_LOCK_OFFSET ((sizeof(LogSummaryHdr))+2*sizeof(u32)) -#define LOGSUMMARY_LOCK_RESERVED 16 +#define WALINDEX_LOCK_OFFSET ((sizeof(WalIndexHdr))+2*sizeof(u32)) +#define WALINDEX_LOCK_RESERVED 8 -/* Size of header before each frame in log file */ -#define LOG_FRAME_HDRSIZE 16 +/* Size of header before each frame in wal */ +#define WAL_FRAME_HDRSIZE 16 -/* Size of log header */ -#define LOG_HDRSIZE 12 +/* Size of write ahead log header */ +#define WAL_HDRSIZE 12 /* -** Return the offset of frame iFrame in the log file, assuming a database -** page size of pgsz bytes. The offset returned is to the start of the -** log frame-header. +** Return the offset of frame iFrame in the write-ahead log file, +** assuming a database page size of pgsz bytes. The offset returned +** is to the start of the write-ahead log frame-header. */ -#define logFrameOffset(iFrame, pgsz) ( \ - LOG_HDRSIZE + ((iFrame)-1)*((pgsz)+LOG_FRAME_HDRSIZE) \ +#define walFrameOffset(iFrame, pgsz) ( \ + WAL_HDRSIZE + ((iFrame)-1)*((pgsz)+WAL_FRAME_HDRSIZE) \ ) /* -** If using mmap() to access a shared (or otherwise) log-summary file, then -** the mapping size is incremented in units of the following size. -** -** A 64 KB log-summary mapping corresponds to a log file containing over -** 13000 frames, so the mapping size does not need to be increased often. +** An open write-ahead log file is represented by an instance of the +** following object. */ -#ifdef SQLITE_TEST -int sqlite3_walsummary_mmap_incr = 128; -# define LOGSUMMARY_MMAP_INCREMENT sqlite3_walsummary_mmap_incr -#else -# define LOGSUMMARY_MMAP_INCREMENT (64*1024) -#endif - -/* -** There is one instance of this structure for each log-summary object -** that this process has a connection to. They are stored in a linked -** list starting at pLogSummary (global variable). -** -** TODO: LogSummary.fd is a unix file descriptor. Unix APIs are used -** directly in this implementation because the VFS does not support -** the required blocking file-locks. -*/ -struct LogSummary { - sqlite3_mutex *mutex; /* Mutex used to protect this object */ - int nRef; /* Number of pointers to this structure */ - int fd; /* File descriptor open on log-summary */ - char *zPath; /* Path to associated WAL file */ - LogLock *pLock; /* Linked list of locks on this object */ - LogSummary *pNext; /* Next in global list */ - - int nData; /* Size of aData allocation/mapping */ - u32 *aData; /* File body */ -}; - -/* -** This module uses three different types of file-locks. All are taken -** on the log-summary file. The three types of locks are as follows: -** -** MUTEX: The MUTEX lock is used as a robust inter-process mutex. It -** is held while the log-summary header is modified, and -** sometimes when it is read. It is also held while a new client -** obtains the DMH lock (see below), and while log recovery is -** being run. -** -** DMS: The DMS (Dead Mans Switch mechanism) lock is used to ensure -** that log-recovery is always run following a system restart. -** When it first opens a log-summary file, a process takes a -** SHARED lock on the DMH region. This lock is not released until -** the log-summary file is closed. -** -** The process then attempts to upgrade to an EXCLUSIVE lock. If -** successful, then the contents of the log-summary file are deemed -** suspect and the log-summary header zeroed. This forces the -** first process that reads the log-summary file to run log -** recovery. After zeroing the log-summary header, the process -** downgrades to a SHARED lock on the DMH region. -** -** If the attempt to obtain the EXCLUSIVE lock fails, then the -** process concludes that some other process is already using the -** log-summary file, and it can therefore be trusted. -** -** The procedure described in the previous three paragraphs (taking -** a SHARED lock and then upgrading to an EXCLUSIVE lock to check -** if the process is the only one to have an open connection to the -** log file) is protected by holding the MUTEX lock. This avoids the -** race condition wherein the first two clients connect almost -** simultaneously following a system restart and each prevents -** the other from obtaining the EXCLUSIVE lock. -** -** -** REGION: There are 4 different region locks, regions A, B, C and D. -** Various EXCLUSIVE and SHARED locks on these regions are obtained -** when a client reads, writes or checkpoints the database. -** -** To obtain a reader lock: -** -** 1. Attempt a SHARED lock on regions A and B. -** 2. If step 1 is successful, drop the lock on region B. Or, if -** it is unsuccessful, attempt a SHARED lock on region D. -** 3. Repeat the above until the lock attempt in step 1 or 2 is -** successful. -** -** The reader lock is released when the read transaction is finished. -** -** To obtain a writer lock: -** -** 1. Take (wait for) an EXCLUSIVE lock on regions C and D. -** -** The locks are released after the write transaction is finished -** and, if any frames were committed to the log, the log-summary -** file updated. -** -** To obtain a checkpointer lock: -** -** 1. Take (wait for) an EXCLUSIVE lock on regions B and C. -** 2. Take (wait for) an EXCLUSIVE lock on region A. -** -** Step 1 waits until any existing writer has finished. And forces -** all new readers to become "region D" readers. -** -** Step 2 causes the checkpointer to wait until all existing region A -** readers have finished their transactions. Once the exclusive lock -** on region A has been obtained, only "region D" readers exist. -** These readers are operating on the snapshot at the head of the -** log. As such, the log can be safely copied into the database file -** without interfering with the readers. -** -** Once the checkpoint has finished and the log-summary header -** updated (to indicate the log contents can now be ignored), all -** locks are released. -** -** However, there may still exist region D readers using data in -** the body of the log file, so the log file itself cannot be -** truncated or overwritten until all region D readers have finished. -** That requirement is satisfied, because writers (the clients that -** write to the log file) require an exclusive lock on region D. -** Which they cannot get until all region D readers have finished. -*/ -#define LOG_LOCK_MUTEX (LOGSUMMARY_LOCK_OFFSET) -#define LOG_LOCK_DMH (LOG_LOCK_MUTEX+1) -#define LOG_LOCK_REGION (LOG_LOCK_DMH+1) - -/* -** The four lockable regions associated with each log-summary. A connection -** may take either a SHARED or EXCLUSIVE lock on each. An ORed combination -** of the following bitmasks is passed as the second argument to the -** logLockRegion() function. -*/ -#define LOG_REGION_A 0x01 -#define LOG_REGION_B 0x02 -#define LOG_REGION_C 0x04 -#define LOG_REGION_D 0x08 - -/* -** Values for the third parameter to logLockRegion(). -*/ -#define LOG_UNLOCK 0 /* Unlock a range of bytes */ -#define LOG_RDLOCK 1 /* Put a SHARED lock on a range of bytes */ -#define LOG_WRLOCK 2 /* Put an EXCLUSIVE lock on a byte-range */ -#define LOG_WRLOCKW 3 /* Block on EXCLUSIVE lock on a byte-range */ - -/* -** A single instance of this structure is allocated as part of each -** connection to a database log. All structures associated with the -** same log file are linked together into a list using LogLock.pNext -** starting at LogSummary.pLock. -** -** The mLock field of the structure describes the locks (if any) -** currently held by the connection. If a SHARED lock is held on -** any of the four locking regions, then the associated LOG_REGION_X -** bit (see above) is set. If an EXCLUSIVE lock is held on the region, -** then the (LOG_REGION_X << 8) bit is set. -*/ -struct LogLock { - LogLock *pNext; /* Next lock on the same log */ - u32 mLock; /* Mask of locks */ -}; - -struct Log { - LogSummary *pSummary; /* Log file summary data */ - sqlite3_vfs *pVfs; /* The VFS used to create pFd */ - sqlite3_file *pFd; /* File handle for log file */ - int isLocked; /* Non-zero if a snapshot is held open */ - int isWriteLocked; /* True if this is the writer connection */ - u32 iCallback; /* Value to pass to log callback (or 0) */ - LogSummaryHdr hdr; /* Log summary header for current snapshot */ - LogLock lock; /* Lock held by this connection (if any) */ +struct Wal { + sqlite3_vfs *pVfs; /* The VFS used to create pFd */ + sqlite3_file *pFd; /* File handle for WAL file */ + u32 iCallback; /* Value to pass to log callback (or 0) */ + sqlite3_shm *pWIndex; /* The open wal-index file */ + int szWIndex; /* Size of the wal-index */ + u32 *pWiData; /* Pointer to wal-index content in memory */ + u8 lockState; /* SQLITE_SHM_xxxx constant showing lock state */ + u8 readerType; /* SQLITE_SHM_READ or SQLITE_SHM_READ_FULL */ + WalIndexHdr hdr; /* Wal-index for current snapshot */ }; @@ -292,16 +142,16 @@ struct Log { ** ** The internals of this structure are only accessed by: ** -** logIteratorInit() - Create a new iterator, -** logIteratorNext() - Step an iterator, -** logIteratorFree() - Free an iterator. +** walIteratorInit() - Create a new iterator, +** walIteratorNext() - Step an iterator, +** walIteratorFree() - Free an iterator. ** -** This functionality is used by the checkpoint code (see logCheckpoint()). +** This functionality is used by the checkpoint code (see walCheckpoint()). */ -struct LogIterator { - int nSegment; /* Size of LogIterator.aSegment[] array */ +struct WalIterator { + int nSegment; /* Size of WalIterator.aSegment[] array */ int nFinal; /* Elements in segment nSegment-1 */ - struct LogSegment { + struct WalSegment { int iNext; /* Next aIndex index */ u8 *aIndex; /* Pointer to index array */ u32 *aDbPage; /* Pointer to db page array */ @@ -309,15 +159,6 @@ struct LogIterator { }; - -/* -** List of all LogSummary objects created by this process. Protected by -** static mutex LOG_SUMMARY_MUTEX. TODO: Should have a dedicated mutex -** here instead of borrowing the LRU mutex. -*/ -#define LOG_SUMMARY_MUTEX SQLITE_MUTEX_STATIC_LRU -static LogSummary *pLogSummary = 0; - /* ** Generate an 8 byte checksum based on the data in array aByte[] and the ** initial values of aCksum[0] and aCksum[1]. The checksum is written into @@ -336,7 +177,7 @@ static LogSummary *pLogSummary = 0; ** aCksum[0] = (u32)(aCksum[0] + (aCksum[0]>>24)); ** aCksum[1] = (u32)(aCksum[1] + (aCksum[1]>>24)); */ -static void logChecksumBytes(u8 *aByte, int nByte, u32 *aCksum){ +static void walChecksumBytes(u8 *aByte, int nByte, u32 *aCksum){ u64 sum1 = aCksum[0]; u64 sum2 = aCksum[1]; u32 *a32 = (u32 *)aByte; @@ -366,84 +207,42 @@ static void logChecksumBytes(u8 *aByte, int nByte, u32 *aCksum){ } /* -** Argument zPath must be a nul-terminated string containing a path-name. -** This function modifies the string in-place by removing any "./" or "../" -** elements in the path. For example, the following input: +** Attempt to change the lock status. ** -** "/home/user/plans/good/../evil/./world_domination.txt" -** -** is overwritten with the 'normalized' version: -** -** "/home/user/plans/evil/world_domination.txt" +** When changing the lock status to SQLITE_SHM_READ, store the +** type of reader lock (either SQLITE_SHM_READ or SQLITE_SHM_READ_FULL) +** in pWal->readerType. */ -static void logNormalizePath(char *zPath){ - int i, j; - char *z = zPath; - int n = strlen(z); - - while( n>1 && z[n-1]=='/' ){ n--; } - for(i=j=0; i0 && z[j-1]!='/' ){ j--; } - if( j>0 ){ j--; } - i += 2; - continue; - } +static int walSetLock(Wal *pWal, int desiredStatus){ + int rc, got; + if( pWal->lockState==desiredStatus ) return SQLITE_OK; + rc = pWal->pVfs->xShmLock(pWal->pWIndex, desiredStatus, &got); + if( rc==SQLITE_OK ){ + pWal->lockState = desiredStatus; + if( desiredStatus==SQLITE_SHM_READ ){ + pWal->readerType = got; } - z[j++] = z[i]; - } - z[j] = 0; -} - -/* -** Unmap the log-summary mapping and close the file-descriptor. If -** the isTruncate argument is non-zero, truncate the log-summary file -** region to zero bytes. -** -** Regardless of the value of isTruncate, close the file-descriptor -** opened on the log-summary file. -*/ -static int logSummaryUnmap(LogSummary *pSummary, int isUnlink){ - int rc = SQLITE_OK; - if( pSummary->aData ){ - assert( pSummary->fd>0 ); - munmap(pSummary->aData, pSummary->nData); - pSummary->aData = 0; - if( isUnlink ){ - char *zFile = sqlite3_mprintf("%s-summary", pSummary->zPath); - if( !zFile ){ - rc = SQLITE_NOMEM; - } - unlink(zFile); - sqlite3_free(zFile); - } - } - if( pSummary->fd>0 ){ - close(pSummary->fd); - pSummary->fd = -1; } return rc; } -static void logSummaryWriteHdr(LogSummary *pSummary, LogSummaryHdr *pHdr){ - u32 *aHdr = pSummary->aData; /* Write header here */ - u32 *aCksum = &aHdr[LOGSUMMARY_HDR_NFIELD]; /* Write header cksum here */ +/* +** Update the header of the wal-index file. +*/ +static void walIndexWriteHdr(Wal *pWal, WalIndexHdr *pHdr){ + u32 *aHdr = pWal->pWiData; /* Write header here */ + u32 *aCksum = &aHdr[WALINDEX_HDR_NFIELD]; /* Write header cksum here */ - assert( LOGSUMMARY_HDR_NFIELD==sizeof(LogSummaryHdr)/4 ); - memcpy(aHdr, pHdr, sizeof(LogSummaryHdr)); + assert( WALINDEX_HDR_NFIELD==sizeof(WalIndexHdr)/4 ); + assert( aHdr!=0 ); + memcpy(aHdr, pHdr, sizeof(WalIndexHdr)); aCksum[0] = aCksum[1] = 1; - logChecksumBytes((u8 *)aHdr, sizeof(LogSummaryHdr), aCksum); + walChecksumBytes((u8 *)aHdr, sizeof(WalIndexHdr), aCksum); } /* ** This function encodes a single frame header and writes it to a buffer -** supplied by the caller. A log frame-header is made up of a series of +** supplied by the caller. A frame-header is made up of a series of ** 4-byte big-endian integers, as follows: ** ** 0: Database page size in bytes. @@ -452,7 +251,7 @@ static void logSummaryWriteHdr(LogSummary *pSummary, LogSummaryHdr *pHdr){ ** 12: Frame checksum 1. ** 16: Frame checksum 2. */ -static void logEncodeFrame( +static void walEncodeFrame( u32 *aCksum, /* IN/OUT: Checksum values */ u32 iPage, /* Database page number for frame */ u32 nTruncate, /* New db size (or 0 for non-commit frames) */ @@ -460,13 +259,13 @@ static void logEncodeFrame( u8 *aData, /* Pointer to page data (for checksum) */ u8 *aFrame /* OUT: Write encoded frame here */ ){ - assert( LOG_FRAME_HDRSIZE==16 ); + assert( WAL_FRAME_HDRSIZE==16 ); sqlite3Put4byte(&aFrame[0], iPage); sqlite3Put4byte(&aFrame[4], nTruncate); - logChecksumBytes(aFrame, 8, aCksum); - logChecksumBytes(aData, nData, aCksum); + walChecksumBytes(aFrame, 8, aCksum); + walChecksumBytes(aData, nData, aCksum); sqlite3Put4byte(&aFrame[8], aCksum[0]); sqlite3Put4byte(&aFrame[12], aCksum[1]); @@ -476,7 +275,7 @@ static void logEncodeFrame( ** Return 1 and populate *piPage, *pnTruncate and aCksum if the ** frame checksum looks Ok. Otherwise return 0. */ -static int logDecodeFrame( +static int walDecodeFrame( u32 *aCksum, /* IN/OUT: Checksum values */ u32 *piPage, /* OUT: Database page number for frame */ u32 *pnTruncate, /* OUT: New db size (or 0 if not commit) */ @@ -484,10 +283,10 @@ static int logDecodeFrame( u8 *aData, /* Pointer to page data (for checksum) */ u8 *aFrame /* Frame data */ ){ - assert( LOG_FRAME_HDRSIZE==16 ); + assert( WAL_FRAME_HDRSIZE==16 ); - logChecksumBytes(aFrame, 8, aCksum); - logChecksumBytes(aData, nData, aCksum); + walChecksumBytes(aFrame, 8, aCksum); + walChecksumBytes(aData, nData, aCksum); if( aCksum[0]!=sqlite3Get4byte(&aFrame[8]) || aCksum[1]!=sqlite3Get4byte(&aFrame[12]) @@ -501,8 +300,8 @@ static int logDecodeFrame( return 1; } -static void logMergesort8( - Pgno *aContent, /* Pages in log */ +static void walMergesort8( + Pgno *aContent, /* Pages in wal */ u8 *aBuffer, /* Buffer of at least *pnList items to use */ u8 *aList, /* IN/OUT: List to sort */ int *pnList /* IN/OUT: Number of elements in aList[] */ @@ -518,8 +317,8 @@ static void logMergesort8( int iOut = 0; /* Current index in output buffer */ /* TODO: Change to non-recursive version. */ - logMergesort8(aContent, aBuffer, aLeft, &nLeft); - logMergesort8(aContent, aBuffer, aRight, &nRight); + walMergesort8(aContent, aBuffer, aLeft, &nLeft); + walMergesort8(aContent, aBuffer, aRight, &nRight); while( iRightfd at pSummary->aData. If the summary file is smaller than -** nByte bytes in size when this function is called, ftruncate() is -** used to expand it before it is mapped. -** -** It is assumed that an exclusive lock is held on the summary file -** by the caller (to protect the ftruncate()). -*/ -static int logSummaryMap(LogSummary *pSummary, int nByte){ - struct stat sStat; - int rc; - int fd = pSummary->fd; - void *pMap; - - assert( pSummary->aData==0 ); - - /* If the file is less than nByte bytes in size, cause it to grow. */ - rc = fstat(fd, &sStat); - if( rc!=0 ) return SQLITE_IOERR; - if( sStat.st_sizeaData = (u32 *)pMap; - pSummary->nData = nByte/4; - - return SQLITE_OK; -} - -/* -** The log-summary file is already mapped to pSummary->aData[], but the -** mapping needs to be resized. Unmap and remap the file so that the mapping -** is at least nByte bytes in size, or the size of the entire file if it -** is larger than nByte bytes. -*/ -static int logSummaryRemap(LogSummary *pSummary, int nByte){ - int rc; - sqlite3_mutex_enter(pSummary->mutex); - munmap(pSummary->aData, pSummary->nData*4); - pSummary->aData = 0; - rc = logSummaryMap(pSummary, nByte); - sqlite3_mutex_leave(pSummary->mutex); - return rc; -} - -/* -** Return the index in the LogSummary.aData array that corresponds to -** frame iFrame. The log-summary file consists of a header, followed by +** Return the index in the WalIndex.aData array that corresponds to +** frame iFrame. The wal-index file consists of a header, followed by ** alternating "map" and "index" blocks. */ -static int logSummaryEntry(u32 iFrame){ +static int walIndexEntry(u32 iFrame){ return ( - (LOGSUMMARY_LOCK_OFFSET+LOGSUMMARY_LOCK_RESERVED)/sizeof(u32) + (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32) + (((iFrame-1)>>8)<<6) /* Indexes that occur before iFrame */ + iFrame-1 /* Db page numbers that occur before iFrame */ ); } +/* +** Release our reference to the wal-index memory map. +*/ +static void walIndexUnmap(Wal *pWal){ + if( pWal->pWiData ){ + pWal->pVfs->xShmRelease(pWal->pWIndex); + pWal->pWiData = 0; + } +} /* -** Set an entry in the log-summary map to map log frame iFrame to db -** page iPage. Values are always appended to the log-summary (i.e. the +** Map the wal-index file into memory if it isn't already. +*/ +static int walIndexMap(Wal *pWal){ + int rc = SQLITE_OK; + if( pWal->pWiData==0 ){ + rc = pWal->pVfs->xShmSize(pWal->pWIndex, -1, + &pWal->szWIndex, (void**)(char*)&pWal->pWiData); + } + return rc; +} + +/* +** Resize the wal-index file. +*/ +static int walIndexRemap(Wal *pWal, int newSize){ + int rc; + walIndexUnmap(pWal); + rc = pWal->pVfs->xShmSize(pWal->pWIndex, newSize, + &pWal->szWIndex, (void**)(char*)&pWal->pWiData); + return rc; +} + +/* +** Increment by which to increase the wal-index file size. +*/ +#define WALINDEX_MMAP_INCREMENT (64*1024) + +/* +** Set an entry in the wal-index map to map log frame iFrame to db +** page iPage. Values are always appended to the wal-index (i.e. the ** value of iFrame is always exactly one more than the value passed to ** the previous call), but that restriction is not enforced or asserted ** here. */ -static int logSummaryAppend(LogSummary *pSummary, u32 iFrame, u32 iPage){ - u32 iSlot = logSummaryEntry(iFrame); - - while( (iSlot+128)>=pSummary->nData ){ +static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ + u32 iSlot = walIndexEntry(iFrame); + + walIndexMap(pWal); + while( (iSlot+128)>=pWal->szWIndex ){ int rc; - int nByte = pSummary->nData*4 + LOGSUMMARY_MMAP_INCREMENT; + int nByte = pWal->szWIndex*4 + WALINDEX_MMAP_INCREMENT; - /* Unmap and remap the log-summary file. */ - rc = logSummaryRemap(pSummary, nByte); + /* Unmap and remap the wal-index file. */ + rc = walIndexRemap(pWal, nByte); if( rc!=SQLITE_OK ){ return rc; } } - /* Set the log-summary entry itself */ - pSummary->aData[iSlot] = iPage; + /* Set the wal-index entry itself */ + pWal->pWiData[iSlot] = iPage; /* If the frame number is a multiple of 256 (frames are numbered starting ** at 1), build an index of the most recently added 256 frames. @@ -657,13 +440,13 @@ static int logSummaryAppend(LogSummary *pSummary, u32 iFrame, u32 iPage){ u8 *aIndex; /* 256 bytes to build index in */ u8 *aTmp; /* Scratch space to use while sorting */ - aFrame = &pSummary->aData[iSlot-255]; - aIndex = (u8 *)&pSummary->aData[iSlot+1]; + aFrame = &pWal->pWiData[iSlot-255]; + aIndex = (u8 *)&pWal->pWiData[iSlot+1]; aTmp = &aIndex[256]; nIndex = 256; for(i=0; i<256; i++) aIndex[i] = (u8)i; - logMergesort8(aFrame, aTmp, aIndex, &nIndex); + walMergesort8(aFrame, aTmp, aIndex, &nIndex); memset(&aIndex[nIndex], aIndex[nIndex-1], 256-nIndex); } @@ -672,23 +455,24 @@ static int logSummaryAppend(LogSummary *pSummary, u32 iFrame, u32 iPage){ /* -** Recover the log-summary by reading the log file. The caller must hold -** an exclusive lock on the log-summary file. +** Recover the wal-index by reading the write-ahead log file. +** The caller must hold RECOVER lock on the wal-index file. */ -static int logSummaryRecover(LogSummary *pSummary, sqlite3_file *pFd){ +static int walIndexRecover(Wal *pWal){ int rc; /* Return Code */ i64 nSize; /* Size of log file */ - LogSummaryHdr hdr; /* Recovered log-summary header */ + WalIndexHdr hdr; /* Recovered wal-index header */ + assert( pWal->lockState==SQLITE_SHM_RECOVER ); memset(&hdr, 0, sizeof(hdr)); - rc = sqlite3OsFileSize(pFd, &nSize); + rc = sqlite3OsFileSize(pWal->pFd, &nSize); if( rc!=SQLITE_OK ){ return rc; } - if( nSize>LOG_FRAME_HDRSIZE ){ - u8 aBuf[LOG_FRAME_HDRSIZE]; /* Buffer to load first frame header into */ + if( nSize>WAL_FRAME_HDRSIZE ){ + u8 aBuf[WAL_FRAME_HDRSIZE]; /* Buffer to load first frame header into */ u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */ int nFrame; /* Number of bytes at aFrame */ u8 *aData; /* Pointer to data part of aFrame buffer */ @@ -700,7 +484,7 @@ static int logSummaryRecover(LogSummary *pSummary, sqlite3_file *pFd){ /* Read in the first frame header in the file (to determine the ** database page size). */ - rc = sqlite3OsRead(pFd, aBuf, LOG_HDRSIZE, 0); + rc = sqlite3OsRead(pWal->pFd, aBuf, WAL_HDRSIZE, 0); if( rc!=SQLITE_OK ){ return rc; } @@ -716,26 +500,26 @@ static int logSummaryRecover(LogSummary *pSummary, sqlite3_file *pFd){ aCksum[1] = sqlite3Get4byte(&aBuf[8]); /* Malloc a buffer to read frames into. */ - nFrame = nPgsz + LOG_FRAME_HDRSIZE; + nFrame = nPgsz + WAL_FRAME_HDRSIZE; aFrame = (u8 *)sqlite3_malloc(nFrame); if( !aFrame ){ return SQLITE_NOMEM; } - aData = &aFrame[LOG_FRAME_HDRSIZE]; + aData = &aFrame[WAL_FRAME_HDRSIZE]; /* Read all frames from the log file. */ iFrame = 0; - for(iOffset=LOG_HDRSIZE; (iOffset+nFrame)<=nSize; iOffset+=nFrame){ + for(iOffset=WAL_HDRSIZE; (iOffset+nFrame)<=nSize; iOffset+=nFrame){ u32 pgno; /* Database page number for frame */ u32 nTruncate; /* dbsize field from frame header */ int isValid; /* True if this frame is valid */ /* Read and decode the next log frame. */ - rc = sqlite3OsRead(pFd, aFrame, nFrame, iOffset); + rc = sqlite3OsRead(pWal->pFd, aFrame, nFrame, iOffset); if( rc!=SQLITE_OK ) break; - isValid = logDecodeFrame(aCksum, &pgno, &nTruncate, nPgsz, aData, aFrame); + isValid = walDecodeFrame(aCksum, &pgno, &nTruncate, nPgsz, aData, aFrame); if( !isValid ) break; - logSummaryAppend(pSummary, ++iFrame, pgno); + walIndexAppend(pWal, ++iFrame, pgno); /* If nTruncate is non-zero, this is a commit record. */ if( nTruncate ){ @@ -754,268 +538,7 @@ static int logSummaryRecover(LogSummary *pSummary, sqlite3_file *pFd){ } finished: - logSummaryWriteHdr(pSummary, &hdr); - return rc; -} - -/* -** Place, modify or remove a lock on the log-summary file associated -** with pSummary. -** -** The locked byte-range should be inside the region dedicated to -** locking. This region of the log-summary file is never read or written. -*/ -static int logLockFd( - LogSummary *pSummary, /* The log-summary object to lock */ - int iStart, /* First byte to lock */ - int nByte, /* Number of bytes to lock */ - int op /* LOG_UNLOCK, RDLOCK, WRLOCK or WRLOCKW */ -){ - int aType[4] = { - F_UNLCK, /* LOG_UNLOCK */ - F_RDLCK, /* LOG_RDLOCK */ - F_WRLCK, /* LOG_WRLOCK */ - F_WRLCK /* LOG_WRLOCKW */ - }; - int aOp[4] = { - F_SETLK, /* LOG_UNLOCK */ - F_SETLK, /* LOG_RDLOCK */ - F_SETLK, /* LOG_WRLOCK */ - F_SETLKW /* LOG_WRLOCKW */ - }; - struct flock f; /* Locking operation */ - int rc; /* Value returned by fcntl() */ - - assert( ArraySize(aType)==ArraySize(aOp) ); - assert( op>=0 && op0 ); - assert( iStart>=LOGSUMMARY_LOCK_OFFSET - && iStart+nByte<=LOGSUMMARY_LOCK_OFFSET+LOGSUMMARY_LOCK_RESERVED - ); -#if defined(SQLITE_DEBUG) && defined(SQLITE_OS_UNIX) - if( pSummary->aData ) memset(&((u8*)pSummary->aData)[iStart], op, nByte); -#endif - - memset(&f, 0, sizeof(f)); - f.l_type = aType[op]; - f.l_whence = SEEK_SET; - f.l_start = iStart; - f.l_len = nByte; - rc = fcntl(pSummary->fd, aOp[op], &f); - return (rc==0) ? SQLITE_OK : SQLITE_BUSY; -} - -static int logLockRegion(Log *pLog, u32 mRegion, int op){ - LogSummary *pSummary = pLog->pSummary; - LogLock *p; /* Used to iterate through in-process locks */ - u32 mOther; /* Locks held by other connections */ - u32 mNew; /* New mask for pLog */ - - assert( - /* Writer lock operations */ - (op==LOG_WRLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D)) - || (op==LOG_UNLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D)) - - /* Normal reader lock operations */ - || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B)) - || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A)) - || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B)) - - /* Region D reader lock operations */ - || (op==LOG_RDLOCK && mRegion==(LOG_REGION_D)) - || (op==LOG_RDLOCK && mRegion==(LOG_REGION_A)) - || (op==LOG_UNLOCK && mRegion==(LOG_REGION_D)) - - /* Checkpointer lock operations */ - || (op==LOG_WRLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C)) - || (op==LOG_WRLOCK && mRegion==(LOG_REGION_A)) - || (op==LOG_UNLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C)) - || (op==LOG_UNLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B|LOG_REGION_C)) - ); - - /* Assert that a connection never tries to go from an EXCLUSIVE to a - ** SHARED lock on a region. Moving from SHARED to EXCLUSIVE sometimes - ** happens though (when a region D reader upgrades to a writer). - */ - assert( op!=LOG_RDLOCK || 0==(pLog->lock.mLock & (mRegion<<8)) ); - - sqlite3_mutex_enter(pSummary->mutex); - - /* Calculate a mask of logs held by all connections in this process apart - ** from this one. The least significant byte of the mask contains a mask - ** of the SHARED logs held. The next least significant byte of the mask - ** indicates the EXCLUSIVE locks held. For example, to test if some other - ** connection is holding a SHARED lock on region A, or an EXCLUSIVE lock - ** on region C, do: - ** - ** hasSharedOnA = (mOther & (LOG_REGION_A<<0)); - ** hasExclusiveOnC = (mOther & (LOG_REGION_C<<8)); - ** - ** In all masks, if the bit in the EXCLUSIVE byte mask is set, so is the - ** corresponding bit in the SHARED mask. - */ - mOther = 0; - for(p=pSummary->pLock; p; p=p->pNext){ - assert( (p->mLock & (p->mLock<<8))==(p->mLock&0x0000FF00) ); - if( p!=&pLog->lock ){ - mOther |= p->mLock; - } - } - - /* If this call is to lock a region (not to unlock one), test if locks held - ** by any other connection in this process prevent the new locks from - ** begin granted. If so, exit the summary mutex and return SQLITE_BUSY. - */ - if( op && (mOther & (mRegion << (op==LOG_RDLOCK ? 8 : 0))) ){ - sqlite3_mutex_leave(pSummary->mutex); - return SQLITE_BUSY; - } - - /* Figure out the new log mask for this connection. */ - switch( op ){ - case LOG_UNLOCK: - mNew = (pLog->lock.mLock & ~(mRegion|(mRegion<<8))); - break; - case LOG_RDLOCK: - mNew = (pLog->lock.mLock | mRegion); - break; - default: - assert( op==LOG_WRLOCK ); - mNew = (pLog->lock.mLock | (mRegion<<8) | mRegion); - break; - } - - /* Now modify the locks held on the log-summary file descriptor. This - ** file descriptor is shared by all log connections in this process. - ** Therefore: - ** - ** + If one or more log connections in this process hold a SHARED lock - ** on a region, the file-descriptor should hold a SHARED lock on - ** the file region. - ** - ** + If a log connection in this process holds an EXCLUSIVE lock on a - ** region, the file-descriptor should also hold an EXCLUSIVE lock on - ** the region in question. - ** - ** If this is an LOG_UNLOCK operation, only regions for which no other - ** connection holds a lock should actually be unlocked. And if this - ** is a LOG_RDLOCK operation and other connections already hold all - ** the required SHARED locks, then no system call is required. - */ - if( op==LOG_UNLOCK ){ - mRegion = (mRegion & ~mOther); - } - if( (op==LOG_WRLOCK) - || (op==LOG_UNLOCK && mRegion) - || (op==LOG_RDLOCK && (mOther&mRegion)!=mRegion) - ){ - struct LockMap { - int iStart; /* Byte offset to start locking operation */ - int iLen; /* Length field for locking operation */ - } aMap[] = { - /* 0000 */ {0, 0}, /* 0001 */ {3+LOG_LOCK_REGION, 1}, - /* 0010 */ {2+LOG_LOCK_REGION, 1}, /* 0011 */ {2+LOG_LOCK_REGION, 2}, - /* 0100 */ {1+LOG_LOCK_REGION, 1}, /* 0101 */ {0, 0}, - /* 0110 */ {1+LOG_LOCK_REGION, 2}, /* 0111 */ {1+LOG_LOCK_REGION, 3}, - /* 1000 */ {0+LOG_LOCK_REGION, 1}, /* 1001 */ {0, 0}, - /* 1010 */ {0, 0}, /* 1011 */ {0, 0}, - /* 1100 */ {0+LOG_LOCK_REGION, 2}, /* 1101 */ {0, 0}, - /* 1110 */ {0, 0}, /* 1111 */ {0, 0} - }; - int rc; /* Return code of logLockFd() */ - - assert( mRegionmutex); - return rc; - } - } - - pLog->lock.mLock = mNew; - sqlite3_mutex_leave(pSummary->mutex); - return SQLITE_OK; -} - -/* -** Lock the DMH region, either with an EXCLUSIVE or SHARED lock. This -** function is never called with LOG_UNLOCK - the only way the DMH region -** is every completely unlocked is by by closing the file descriptor. -*/ -static int logLockDMH(LogSummary *pSummary, int eLock){ - assert( sqlite3_mutex_held(pSummary->mutex) ); - assert( eLock==LOG_RDLOCK || eLock==LOG_WRLOCK ); - return logLockFd(pSummary, LOG_LOCK_DMH, 1, eLock); -} - -/* -** Lock (or unlock) the MUTEX region. It is always locked using an -** EXCLUSIVE, blocking lock. -*/ -static int logLockMutex(LogSummary *pSummary, int eLock){ - assert( sqlite3_mutex_held(pSummary->mutex) ); - assert( eLock==LOG_WRLOCKW || eLock==LOG_UNLOCK ); - logLockFd(pSummary, LOG_LOCK_MUTEX, 1, eLock); - return SQLITE_OK; -} - -/* -** This function intializes the connection to the log-summary identified -** by struct pSummary. -*/ -static int logSummaryInit( - LogSummary *pSummary, /* Log summary object to initialize */ - sqlite3_file *pFd /* File descriptor open on log file */ -){ - int rc; /* Return Code */ - char *zFile; /* File name for summary file */ - - assert( pSummary->fd<0 ); - assert( pSummary->aData==0 ); - assert( pSummary->nRef>0 ); - assert( pSummary->zPath ); - - /* Open a file descriptor on the summary file. */ - zFile = sqlite3_mprintf("%s-summary", pSummary->zPath); - if( !zFile ){ - return SQLITE_NOMEM; - } - pSummary->fd = open(zFile, O_RDWR|O_CREAT, S_IWUSR|S_IRUSR); - sqlite3_free(zFile); - if( pSummary->fd<0 ){ - return SQLITE_IOERR; - } - - /* Grab an exclusive lock the summary file. Then mmap() it. - ** - ** TODO: This code needs to be enhanced to support a growable mapping. - ** For now, just make the mapping very large to start with. The - ** pages should not be allocated until they are first accessed anyhow, - ** so using a large mapping consumes no more resources than a smaller - ** one would. - */ - assert( sqlite3_mutex_held(pSummary->mutex) ); - rc = logLockMutex(pSummary, LOG_WRLOCKW); - if( rc!=SQLITE_OK ) return rc; - rc = logSummaryMap(pSummary, LOGSUMMARY_MMAP_INCREMENT); - if( rc!=SQLITE_OK ) goto out; - - /* Try to obtain an EXCLUSIVE lock on the dead-mans-hand region. If this - ** is possible, the contents of the log-summary file (if any) may not - ** be trusted. Zero the log-summary header before continuing. - */ - rc = logLockDMH(pSummary, LOG_WRLOCK); - if( rc==SQLITE_OK ){ - memset(pSummary->aData, 0, (LOGSUMMARY_HDR_NFIELD+2)*sizeof(u32) ); - } - rc = logLockDMH(pSummary, LOG_RDLOCK); - if( rc!=SQLITE_OK ){ - rc = SQLITE_IOERR; - } - - out: - logLockMutex(pSummary, LOG_UNLOCK); + walIndexWriteHdr(pWal, &hdr); return rc; } @@ -1027,112 +550,57 @@ static int logSummaryInit( ** ** A SHARED lock should be held on the database file when this function ** is called. The purpose of this SHARED lock is to prevent any other -** client from unlinking the log or log-summary file. If another process +** client from unlinking the log or wal-index file. If another process ** were to do this just after this client opened one of these files, the ** system would be badly broken. */ int sqlite3WalOpen( - sqlite3_vfs *pVfs, /* vfs module to open log file with */ + sqlite3_vfs *pVfs, /* vfs module to open wal and wal-index */ const char *zDb, /* Name of database file */ - Log **ppLog /* OUT: Allocated Log handle */ + Wal **ppWal /* OUT: Allocated Wal handle */ ){ int rc = SQLITE_OK; /* Return Code */ - Log *pRet; /* Object to allocate and return */ - LogSummary *pSummary = 0; /* Summary object */ - sqlite3_mutex *mutex = 0; /* LOG_SUMMARY_MUTEX mutex */ + Wal *pRet; /* Object to allocate and return */ int flags; /* Flags passed to OsOpen() */ char *zWal = 0; /* Path to WAL file */ int nWal; /* Length of zWal in bytes */ assert( zDb ); + if( pVfs->xShmOpen==0 ) return SQLITE_CANTOPEN; - /* Allocate an instance of struct Log to return. */ - *ppLog = 0; - pRet = (Log *)sqlite3MallocZero(sizeof(Log) + pVfs->szOsFile); - if( !pRet ) goto out; + /* Allocate an instance of struct Wal to return. */ + *ppWal = 0; + nWal = strlen(zDb); + pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile + nWal+11); + if( !pRet ) goto wal_open_out; pRet->pVfs = pVfs; pRet->pFd = (sqlite3_file *)&pRet[1]; + zWal = pVfs->szOsFile + (char*)pRet->pFd; + sqlite3_snprintf(nWal, zWal, "%s-wal-index", zDb); + rc = pVfs->xShmOpen(pVfs, zWal, &pRet->pWIndex); + if( rc ) goto wal_open_out; - /* Normalize the path name. */ - zWal = sqlite3_mprintf("%s-wal", zDb); - if( !zWal ) goto out; - logNormalizePath(zWal); + /* Open file handle on the write-ahead log file. */ + zWal[nWal-6] = 0; flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_MAIN_JOURNAL); - nWal = sqlite3Strlen30(zWal); + rc = sqlite3OsOpen(pVfs, zWal, pRet->pFd, flags, &flags); - /* Enter the mutex that protects the linked-list of LogSummary structures */ - if( sqlite3GlobalConfig.bCoreMutex ){ - mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX); - } - sqlite3_mutex_enter(mutex); - - /* Search for an existing log summary object in the linked list. If one - ** cannot be found, allocate and initialize a new object. - */ - for(pSummary=pLogSummary; pSummary; pSummary=pSummary->pNext){ - int nPath = sqlite3Strlen30(pSummary->zPath); - if( nWal==nPath && 0==memcmp(pSummary->zPath, zWal, nPath) ) break; - } - if( !pSummary ){ - int nByte = sizeof(LogSummary) + nWal + 1; - pSummary = (LogSummary *)sqlite3MallocZero(nByte); - if( !pSummary ){ - rc = SQLITE_NOMEM; - goto out; - } - if( sqlite3GlobalConfig.bCoreMutex ){ - pSummary->mutex = sqlite3_mutex_alloc(SQLITE_MUTEX_RECURSIVE); - } - pSummary->zPath = (char *)&pSummary[1]; - pSummary->fd = -1; - memcpy(pSummary->zPath, zWal, nWal); - pSummary->pNext = pLogSummary; - pLogSummary = pSummary; - } - pSummary->nRef++; - pRet->pSummary = pSummary; - - /* Exit the mutex protecting the linked-list of LogSummary objects. */ - sqlite3_mutex_leave(mutex); - mutex = 0; - - /* Open file handle on the log file. */ - rc = sqlite3OsOpen(pVfs, pSummary->zPath, pRet->pFd, flags, &flags); - if( rc!=SQLITE_OK ) goto out; - - /* Object pSummary is shared between all connections to the database made - ** by this process. So at this point it may or may not be connected to - ** the log-summary. If it is not, connect it. - */ - sqlite3_mutex_enter(pSummary->mutex); - mutex = pSummary->mutex; - if( pSummary->fd<0 ){ - rc = logSummaryInit(pSummary, pRet->pFd); - } - - pRet->lock.pNext = pSummary->pLock; - pSummary->pLock = &pRet->lock; - - out: - sqlite3_mutex_leave(mutex); - sqlite3_free(zWal); +wal_open_out: if( rc!=SQLITE_OK ){ - assert(0); if( pRet ){ + pVfs->xShmClose(pRet->pWIndex); sqlite3OsClose(pRet->pFd); sqlite3_free(pRet); } - assert( !pSummary || pSummary->nRef==0 ); - sqlite3_free(pSummary); } - *ppLog = pRet; + *ppWal = pRet; return rc; } -static int logIteratorNext( - LogIterator *p, /* Iterator */ - u32 *piPage, /* OUT: Next db page to write */ - u32 *piFrame /* OUT: Log frame to read from */ +static int walIteratorNext( + WalIterator *p, /* Iterator */ + u32 *piPage, /* OUT: Next db page to write */ + u32 *piFrame /* OUT: Wal frame to read from */ ){ u32 iMin = *piPage; u32 iRet = 0xFFFFFFFF; @@ -1140,7 +608,7 @@ static int logIteratorNext( int nBlock = p->nFinal; for(i=p->nSegment-1; i>=0; i--){ - struct LogSegment *pSegment = &p->aSegment[i]; + struct WalSegment *pSegment = &p->aSegment[i]; while( pSegment->iNextaDbPage[pSegment->aIndex[pSegment->iNext]]; if( iPg>iMin ){ @@ -1160,23 +628,25 @@ static int logIteratorNext( return (iRet==0xFFFFFFFF); } -static LogIterator *logIteratorInit(Log *pLog){ - u32 *aData = pLog->pSummary->aData; - LogIterator *p; /* Return value */ +static WalIterator *walIteratorInit(Wal *pWal){ + u32 *aData; /* Content of the wal-index file */ + WalIterator *p; /* Return value */ int nSegment; /* Number of segments to merge */ u32 iLast; /* Last frame in log */ int nByte; /* Number of bytes to allocate */ int i; /* Iterator variable */ int nFinal; /* Number of unindexed entries */ - struct LogSegment *pFinal; /* Final (unindexed) segment */ + struct WalSegment *pFinal; /* Final (unindexed) segment */ u8 *aTmp; /* Temp space used by merge-sort */ - iLast = pLog->hdr.iLastPg; + walIndexMap(pWal); + aData = pWal->pWiData; + iLast = pWal->hdr.iLastPg; nSegment = (iLast >> 8) + 1; nFinal = (iLast & 0x000000FF); - nByte = sizeof(LogIterator) + (nSegment-1)*sizeof(struct LogSegment) + 512; - p = (LogIterator *)sqlite3_malloc(nByte); + nByte = sizeof(WalIterator) + (nSegment-1)*sizeof(struct WalSegment) + 512; + p = (WalIterator *)sqlite3_malloc(nByte); if( p ){ memset(p, 0, nByte); p->nSegment = nSegment; @@ -1184,63 +654,63 @@ static LogIterator *logIteratorInit(Log *pLog){ } for(i=0; iaSegment[i].aDbPage = &aData[logSummaryEntry(i*256+1)]; - p->aSegment[i].aIndex = (u8 *)&aData[logSummaryEntry(i*256+1)+256]; + p->aSegment[i].aDbPage = &aData[walIndexEntry(i*256+1)]; + p->aSegment[i].aIndex = (u8 *)&aData[walIndexEntry(i*256+1)+256]; } pFinal = &p->aSegment[nSegment-1]; - pFinal->aDbPage = &aData[logSummaryEntry((nSegment-1)*256+1)]; + pFinal->aDbPage = &aData[walIndexEntry((nSegment-1)*256+1)]; pFinal->aIndex = (u8 *)&pFinal[1]; aTmp = &pFinal->aIndex[256]; for(i=0; iaIndex[i] = i; } - logMergesort8(pFinal->aDbPage, aTmp, pFinal->aIndex, &nFinal); + walMergesort8(pFinal->aDbPage, aTmp, pFinal->aIndex, &nFinal); p->nFinal = nFinal; return p; } /* -** Free a log iterator allocated by logIteratorInit(). +** Free a log iterator allocated by walIteratorInit(). */ -static void logIteratorFree(LogIterator *p){ +static void walIteratorFree(WalIterator *p){ sqlite3_free(p); } /* ** Checkpoint the contents of the log file. */ -static int logCheckpoint( - Log *pLog, /* Log connection */ +static int walCheckpoint( + Wal *pWal, /* Wal connection */ sqlite3_file *pFd, /* File descriptor open on db file */ int sync_flags, /* Flags for OsSync() (or 0) */ u8 *zBuf /* Temporary buffer to use */ ){ int rc; /* Return code */ - int pgsz = pLog->hdr.pgsz; /* Database page-size */ - LogIterator *pIter = 0; /* Log iterator context */ + int pgsz = pWal->hdr.pgsz; /* Database page-size */ + WalIterator *pIter = 0; /* Wal iterator context */ u32 iDbpage = 0; /* Next database page to write */ - u32 iFrame = 0; /* Log frame containing data for iDbpage */ + u32 iFrame = 0; /* Wal frame containing data for iDbpage */ - if( pLog->hdr.iLastPg==0 ){ + if( pWal->hdr.iLastPg==0 ){ return SQLITE_OK; } /* Allocate the iterator */ - pIter = logIteratorInit(pLog); + pIter = walIteratorInit(pWal); if( !pIter ) return SQLITE_NOMEM; /* Sync the log file to disk */ if( sync_flags ){ - rc = sqlite3OsSync(pLog->pFd, sync_flags); + rc = sqlite3OsSync(pWal->pFd, sync_flags); if( rc!=SQLITE_OK ) goto out; } /* Iterate through the contents of the log, copying data to the db file. */ - while( 0==logIteratorNext(pIter, &iDbpage, &iFrame) ){ - rc = sqlite3OsRead(pLog->pFd, zBuf, pgsz, - logFrameOffset(iFrame, pgsz) + LOG_FRAME_HDRSIZE + while( 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ + rc = sqlite3OsRead(pWal->pFd, zBuf, pgsz, + walFrameOffset(iFrame, pgsz) + WAL_FRAME_HDRSIZE ); if( rc!=SQLITE_OK ) goto out; rc = sqlite3OsWrite(pFd, zBuf, pgsz, (iDbpage-1)*pgsz); @@ -1248,18 +718,18 @@ static int logCheckpoint( } /* Truncate the database file */ - rc = sqlite3OsTruncate(pFd, ((i64)pLog->hdr.nPage*(i64)pgsz)); + rc = sqlite3OsTruncate(pFd, ((i64)pWal->hdr.nPage*(i64)pgsz)); if( rc!=SQLITE_OK ) goto out; - /* Sync the database file. If successful, update the log-summary. */ + /* Sync the database file. If successful, update the wal-index. */ if( sync_flags ){ rc = sqlite3OsSync(pFd, sync_flags); if( rc!=SQLITE_OK ) goto out; } - pLog->hdr.iLastPg = 0; - pLog->hdr.iCheck1 = 2; - pLog->hdr.iCheck2 = 3; - logSummaryWriteHdr(pLog->pSummary, &pLog->hdr); + pWal->hdr.iLastPg = 0; + pWal->hdr.iCheck1 = 2; + pWal->hdr.iCheck2 = 3; + walIndexWriteHdr(pWal, &pWal->hdr); /* TODO: If a crash occurs and the current log is copied into the ** database there is no problem. However, if a crash occurs while @@ -1275,14 +745,14 @@ static int logCheckpoint( ** an unwelcome performance hit. Alternatives are... */ #if 0 - memset(zBuf, 0, LOG_FRAME_HDRSIZE); - rc = sqlite3OsWrite(pLog->pFd, zBuf, LOG_FRAME_HDRSIZE, 0); + memset(zBuf, 0, WAL_FRAME_HDRSIZE); + rc = sqlite3OsWrite(pWal->pFd, zBuf, WAL_FRAME_HDRSIZE, 0); if( rc!=SQLITE_OK ) goto out; - rc = sqlite3OsSync(pLog->pFd, pLog->sync_flags); + rc = sqlite3OsSync(pWal->pFd, pWal->sync_flags); #endif out: - logIteratorFree(pIter); + walIteratorFree(pIter); return rc; } @@ -1290,180 +760,92 @@ static int logCheckpoint( ** Close a connection to a log file. */ int sqlite3WalClose( - Log *pLog, /* Log to close */ + Wal *pWal, /* Wal to close */ sqlite3_file *pFd, /* Database file */ int sync_flags, /* Flags to pass to OsSync() (or 0) */ u8 *zBuf /* Buffer of at least page-size bytes */ ){ int rc = SQLITE_OK; - if( pLog ){ - LogLock **ppL; - LogSummary *pSummary = pLog->pSummary; - sqlite3_mutex *mutex = 0; - - sqlite3_mutex_enter(pSummary->mutex); - for(ppL=&pSummary->pLock; *ppL!=&pLog->lock; ppL=&(*ppL)->pNext); - *ppL = pLog->lock.pNext; - sqlite3_mutex_leave(pSummary->mutex); - - if( sqlite3GlobalConfig.bCoreMutex ){ - mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX); - } - sqlite3_mutex_enter(mutex); - - /* Decrement the reference count on the log summary. If this is the last - ** reference to the log summary object in this process, the object will - ** be freed. If this is also the last connection to the database, then - ** checkpoint the database and truncate the log and log-summary files - ** to zero bytes in size. - **/ - pSummary->nRef--; - if( pSummary->nRef==0 ){ - int rc; - LogSummary **pp; - for(pp=&pLogSummary; *pp!=pSummary; pp=&(*pp)->pNext); - *pp = (*pp)->pNext; - - sqlite3_mutex_leave(mutex); - - rc = sqlite3OsLock(pFd, SQLITE_LOCK_EXCLUSIVE); - if( rc==SQLITE_OK ){ - - /* This is the last connection to the database (including other - ** processes). Do three things: - ** - ** 1. Checkpoint the db. - ** 2. Truncate the log file. - ** 3. Unlink the log-summary file. - */ - rc = logCheckpoint(pLog, pFd, sync_flags, zBuf); - if( rc==SQLITE_OK ){ - rc = sqlite3OsDelete(pLog->pVfs, pSummary->zPath, 0); - } - - logSummaryUnmap(pSummary, 1); - }else{ - if( rc==SQLITE_BUSY ){ - rc = SQLITE_OK; - } - logSummaryUnmap(pSummary, 0); - } - - sqlite3_mutex_free(pSummary->mutex); - sqlite3_free(pSummary); - }else{ - sqlite3_mutex_leave(mutex); - } - - /* Close the connection to the log file and free the Log handle. */ - sqlite3OsClose(pLog->pFd); - sqlite3_free(pLog); + if( pWal ){ + pWal->pVfs->xShmClose(pWal->pWIndex); + sqlite3OsClose(pWal->pFd); + sqlite3_free(pWal); } return rc; } /* -** Enter and leave the log-summary mutex. In this context, entering the -** log-summary mutex means: -** -** 1. Obtaining mutex pLog->pSummary->mutex, and -** 2. Taking an exclusive lock on the log-summary file. -** -** i.e. this mutex locks out other processes as well as other threads -** hosted in this address space. -*/ -static int logEnterMutex(Log *pLog){ - LogSummary *pSummary = pLog->pSummary; - int rc; - - sqlite3_mutex_enter(pSummary->mutex); - rc = logLockMutex(pSummary, LOG_WRLOCKW); - if( rc!=SQLITE_OK ){ - sqlite3_mutex_leave(pSummary->mutex); - } - return rc; -} -static void logLeaveMutex(Log *pLog){ - LogSummary *pSummary = pLog->pSummary; - logLockMutex(pSummary, LOG_UNLOCK); - sqlite3_mutex_leave(pSummary->mutex); -} - -/* -** Try to read the log-summary header. Attempt to verify the header -** checksum. If the checksum can be verified, copy the log-summary -** header into structure pLog->hdr. If the contents of pLog->hdr are +** Try to read the wal-index header. Attempt to verify the header +** checksum. If the checksum can be verified, copy the wal-index +** header into structure pWal->hdr. If the contents of pWal->hdr are ** modified by this and pChanged is not NULL, set *pChanged to 1. ** Otherwise leave *pChanged unmodified. ** ** If the checksum cannot be verified return SQLITE_ERROR. */ -int logSummaryTryHdr(Log *pLog, int *pChanged){ +int walIndexTryHdr(Wal *pWal, int *pChanged){ u32 aCksum[2] = {1, 1}; - u32 aHdr[LOGSUMMARY_HDR_NFIELD+2]; + u32 aHdr[WALINDEX_HDR_NFIELD+2]; - /* Read the header. The caller may or may not have locked the log-summary + /* Read the header. The caller may or may not have locked the wal-index ** file, meaning it is possible that an inconsistent snapshot is read ** from the file. If this happens, return SQLITE_ERROR. The caller will ** retry. Or, if the caller has already locked the file and the header ** still looks inconsistent, it will run recovery. */ - memcpy(aHdr, pLog->pSummary->aData, sizeof(aHdr)); - logChecksumBytes((u8*)aHdr, sizeof(u32)*LOGSUMMARY_HDR_NFIELD, aCksum); - if( aCksum[0]!=aHdr[LOGSUMMARY_HDR_NFIELD] - || aCksum[1]!=aHdr[LOGSUMMARY_HDR_NFIELD+1] + memcpy(aHdr, pWal->pWiData, sizeof(aHdr)); + walChecksumBytes((u8*)aHdr, sizeof(u32)*WALINDEX_HDR_NFIELD, aCksum); + if( aCksum[0]!=aHdr[WALINDEX_HDR_NFIELD] + || aCksum[1]!=aHdr[WALINDEX_HDR_NFIELD+1] ){ return SQLITE_ERROR; } - if( memcmp(&pLog->hdr, aHdr, sizeof(LogSummaryHdr)) ){ + if( memcmp(&pWal->hdr, aHdr, sizeof(WalIndexHdr)) ){ if( pChanged ){ *pChanged = 1; } - memcpy(&pLog->hdr, aHdr, sizeof(LogSummaryHdr)); + memcpy(&pWal->hdr, aHdr, sizeof(WalIndexHdr)); } return SQLITE_OK; } /* -** Read the log-summary header from the log-summary file into structure -** pLog->hdr. If attempting to verify the header checksum fails, try +** Read the wal-index header from the wal-index file into structure +** pWal->hdr. If attempting to verify the header checksum fails, try ** to recover the log before returning. ** -** If the log-summary header is successfully read, return SQLITE_OK. +** If the wal-index header is successfully read, return SQLITE_OK. ** Otherwise an SQLite error code. */ -int logSummaryReadHdr(Log *pLog, int *pChanged){ +static int walIndexReadHdr(Wal *pWal, int *pChanged){ int rc; + assert( pWal->lockState==SQLITE_SHM_READ ); + walIndexMap(pWal); + /* First try to read the header without a lock. Verify the checksum ** before returning. This will almost always work. - ** - ** TODO: Doing this causes a race-condition with the code that resizes - ** the mapping. Unless Log.pSummary->mutex is held, it is possible that - ** LogSummary.aData is invalid. */ -#if 0 - if( SQLITE_OK==logSummaryTryHdr(pLog, pChanged) ){ + if( SQLITE_OK==walIndexTryHdr(pWal, pChanged) ){ return SQLITE_OK; } -#endif - /* If the first attempt to read the header failed, lock the log-summary + /* If the first attempt to read the header failed, lock the wal-index ** file and try again. If the header checksum verification fails this ** time as well, run log recovery. */ - if( SQLITE_OK==(rc = logEnterMutex(pLog)) ){ - if( SQLITE_OK!=logSummaryTryHdr(pLog, pChanged) ){ + if( SQLITE_OK==(rc = walSetLock(pWal, SQLITE_SHM_RECOVER)) ){ + if( SQLITE_OK!=walIndexTryHdr(pWal, pChanged) ){ if( pChanged ){ *pChanged = 1; } - rc = logSummaryRecover(pLog->pSummary, pLog->pFd); + rc = walIndexRecover(pWal); if( rc==SQLITE_OK ){ - rc = logSummaryTryHdr(pLog, 0); + rc = walIndexTryHdr(pWal, 0); } } - logLeaveMutex(pLog); + walSetLock(pWal, SQLITE_SHM_READ); } return rc; @@ -1473,57 +855,29 @@ int logSummaryReadHdr(Log *pLog, int *pChanged){ ** Lock a snapshot. ** ** If this call obtains a new read-lock and the database contents have been -** modified since the most recent call to LogCloseSnapshot() on this Log +** modified since the most recent call to WalCloseSnapshot() on this Wal ** connection, then *pChanged is set to 1 before returning. Otherwise, it ** is left unmodified. This is used by the pager layer to determine whether ** or not any cached pages may be safely reused. */ -int sqlite3WalOpenSnapshot(Log *pLog, int *pChanged){ - int rc = SQLITE_OK; - if( pLog->isLocked==0 ){ - int nAttempt; +int sqlite3WalOpenSnapshot(Wal *pWal, int *pChanged){ + int rc; - /* Obtain a snapshot-lock on the log-summary file. The procedure - ** for obtaining the snapshot log is: - ** - ** 1. Attempt a SHARED lock on regions A and B. - ** 2a. If step 1 is successful, drop the lock on region B. - ** 2b. If step 1 is unsuccessful, attempt a SHARED lock on region D. - ** 3. Repeat the above until the lock attempt in step 1 or 2b is - ** successful. - ** - ** If neither of the locks can be obtained after 5 tries, presumably - ** something is wrong (i.e. a process not following the locking protocol). - ** Return an error code in this case. - */ - rc = SQLITE_BUSY; - for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){ - rc = logLockRegion(pLog, LOG_REGION_A|LOG_REGION_B, LOG_RDLOCK); - if( rc==SQLITE_BUSY ){ - rc = logLockRegion(pLog, LOG_REGION_D, LOG_RDLOCK); - if( rc==SQLITE_OK ) pLog->isLocked = LOG_REGION_D; - }else{ - logLockRegion(pLog, LOG_REGION_B, LOG_UNLOCK); - pLog->isLocked = LOG_REGION_A; - } - } - if( rc!=SQLITE_OK ){ - return rc; - } + rc = walSetLock(pWal, SQLITE_SHM_READ); + if( rc==SQLITE_OK ){ + pWal->lockState = SQLITE_SHM_READ; - rc = logSummaryReadHdr(pLog, pChanged); + rc = walIndexReadHdr(pWal, pChanged); if( rc!=SQLITE_OK ){ /* An error occured while attempting log recovery. */ - sqlite3WalCloseSnapshot(pLog); + sqlite3WalCloseSnapshot(pWal); }else{ /* Check if the mapping needs to grow. */ - LogSummary *pSummary = pLog->pSummary; - - if( pLog->hdr.iLastPg - && logSummaryEntry(pLog->hdr.iLastPg)>=pSummary->nData - ){ - rc = logSummaryRemap(pSummary, 0); - assert( rc || logSummaryEntry(pLog->hdr.iLastPg)nData ); + if( pWal->hdr.iLastPg + && walIndexEntry(pWal->hdr.iLastPg)>=pWal->szWIndex + ){ + rc = walIndexRemap(pWal, 0); + assert( rc || walIndexEntry(pWal->hdr.iLastPg)szWIndex ); } } } @@ -1533,41 +887,39 @@ int sqlite3WalOpenSnapshot(Log *pLog, int *pChanged){ /* ** Unlock the current snapshot. */ -void sqlite3WalCloseSnapshot(Log *pLog){ - if( pLog->isLocked ){ - assert( pLog->isLocked==LOG_REGION_A || pLog->isLocked==LOG_REGION_D ); - logLockRegion(pLog, pLog->isLocked, LOG_UNLOCK); +void sqlite3WalCloseSnapshot(Wal *pWal){ + if( pWal->lockState!=SQLITE_SHM_UNLOCK ){ + assert( pWal->lockState==SQLITE_SHM_READ ); + walSetLock(pWal, SQLITE_SHM_UNLOCK); } - pLog->isLocked = 0; } /* ** Read a page from the log, if it is present. */ -int sqlite3WalRead(Log *pLog, Pgno pgno, int *pInLog, u8 *pOut){ - LogSummary *pSummary = pLog->pSummary; +int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, u8 *pOut){ u32 iRead = 0; u32 *aData; - int iFrame = (pLog->hdr.iLastPg & 0xFFFFFF00); + int iFrame = (pWal->hdr.iLastPg & 0xFFFFFF00); - assert( pLog->isLocked ); - sqlite3_mutex_enter(pSummary->mutex); + assert( pWal->lockState==SQLITE_SHM_READ ); + walIndexMap(pWal); /* Do a linear search of the unindexed block of page-numbers (if any) - ** at the end of the log-summary. An alternative to this would be to + ** at the end of the wal-index. An alternative to this would be to ** build an index in private memory each time a read transaction is ** opened on a new snapshot. */ - aData = pSummary->aData; - if( pLog->hdr.iLastPg ){ - u32 *pi = &aData[logSummaryEntry(pLog->hdr.iLastPg)]; - u32 *piStop = pi - (pLog->hdr.iLastPg & 0xFF); + aData = pWal->pWiData; + if( pWal->hdr.iLastPg ){ + u32 *pi = &aData[walIndexEntry(pWal->hdr.iLastPg)]; + u32 *piStop = pi - (pWal->hdr.iLastPg & 0xFF); while( *pi!=pgno && pi!=piStop ) pi--; if( pi!=piStop ){ iRead = (pi-piStop) + iFrame; } } - assert( iRead==0 || aData[logSummaryEntry(iRead)]==pgno ); + assert( iRead==0 || aData[walIndexEntry(iRead)]==pgno ); while( iRead==0 && iFrame>0 ){ int iLow = 0; @@ -1576,7 +928,7 @@ int sqlite3WalRead(Log *pLog, Pgno pgno, int *pInLog, u8 *pOut){ u8 *aIndex; iFrame -= 256; - aFrame = &aData[logSummaryEntry(iFrame+1)]; + aFrame = &aData[walIndexEntry(iFrame+1)]; aIndex = (u8 *)&aFrame[256]; while( iLow<=iHigh ){ @@ -1594,20 +946,19 @@ int sqlite3WalRead(Log *pLog, Pgno pgno, int *pInLog, u8 *pOut){ } } } - assert( iRead==0 || aData[logSummaryEntry(iRead)]==pgno ); - - sqlite3_mutex_leave(pLog->pSummary->mutex); + assert( iRead==0 || aData[walIndexEntry(iRead)]==pgno ); + walIndexUnmap(pWal); /* If iRead is non-zero, then it is the log frame number that contains the ** required page. Read and return data from the log file. */ if( iRead ){ - i64 iOffset = logFrameOffset(iRead, pLog->hdr.pgsz) + LOG_FRAME_HDRSIZE; - *pInLog = 1; - return sqlite3OsRead(pLog->pFd, pOut, pLog->hdr.pgsz, iOffset); + i64 iOffset = walFrameOffset(iRead, pWal->hdr.pgsz) + WAL_FRAME_HDRSIZE; + *pInWal = 1; + return sqlite3OsRead(pWal->pFd, pOut, pWal->hdr.pgsz, iOffset); } - *pInLog = 0; + *pInWal = 0; return SQLITE_OK; } @@ -1615,9 +966,10 @@ int sqlite3WalRead(Log *pLog, Pgno pgno, int *pInLog, u8 *pOut){ /* ** Set *pPgno to the size of the database file (or zero, if unknown). */ -void sqlite3WalDbsize(Log *pLog, Pgno *pPgno){ - assert( pLog->isLocked ); - *pPgno = pLog->hdr.nPage; +void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){ + assert( pWal->lockState==SQLITE_SHM_READ + || pWal->lockState==SQLITE_SHM_WRITE ); + *pPgno = pWal->hdr.nPage; } /* @@ -1625,54 +977,19 @@ void sqlite3WalDbsize(Log *pLog, Pgno *pPgno){ ** Otherwise, if the caller is operating on a snapshot that has already ** been overwritten by another writer, SQLITE_BUSY is returned. */ -int sqlite3WalWriteLock(Log *pLog, int op){ - assert( pLog->isLocked ); +int sqlite3WalWriteLock(Wal *pWal, int op){ + int rc; if( op ){ - - /* Obtain the writer lock */ - int rc = logLockRegion(pLog, LOG_REGION_C|LOG_REGION_D, LOG_WRLOCK); - if( rc!=SQLITE_OK ){ - return rc; - } - - /* If this is connection is a region D reader, then the SHARED lock on - ** region D has just been upgraded to EXCLUSIVE. But no lock at all is - ** held on region A. This means that if the write-transaction is committed - ** and this connection downgrades to a reader, it will be left with no - ** lock at all. And so its snapshot could get clobbered by a checkpoint - ** operation. - ** - ** To stop this from happening, grab a SHARED lock on region A now. - ** This should always be successful, as the only time a client holds - ** an EXCLUSIVE lock on region A, it must also be holding an EXCLUSIVE - ** lock on region C (a checkpointer does this). This is not possible, - ** as this connection currently has the EXCLUSIVE lock on region C. - */ - if( pLog->isLocked==LOG_REGION_D ){ - logLockRegion(pLog, LOG_REGION_A, LOG_RDLOCK); - pLog->isLocked = LOG_REGION_A; - } - - /* If this connection is not reading the most recent database snapshot, - ** it is not possible to write to the database. In this case release - ** the write locks and return SQLITE_BUSY. - */ - if( memcmp(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr)) ){ - logLockRegion(pLog, LOG_REGION_C|LOG_REGION_D, LOG_UNLOCK); - return SQLITE_BUSY; - } - pLog->isWriteLocked = 1; - - }else if( pLog->isWriteLocked ){ - logLockRegion(pLog, LOG_REGION_C|LOG_REGION_D, LOG_UNLOCK); - memcpy(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr)); - pLog->isWriteLocked = 0; + assert( pWal->lockState == SQLITE_SHM_READ ); + rc = walSetLock(pWal, SQLITE_SHM_WRITE); + }else if( pWal->lockState==SQLITE_SHM_WRITE ){ + rc = walSetLock(pWal, SQLITE_SHM_READ); } - return SQLITE_OK; + return rc; } /* -** The log handle passed to this function must be holding the write-lock. +** The Wal object passed to this function must be holding the write-lock. ** ** If any data has been written (but not committed) to the log file, this ** function moves the write-pointer back to the start of the transaction. @@ -1685,35 +1002,42 @@ int sqlite3WalWriteLock(Log *pLog, int op){ ** Otherwise, if the callback function does not return an error, this ** function returns SQLITE_OK. */ -int sqlite3WalUndo(Log *pLog, int (*xUndo)(void *, Pgno), void *pUndoCtx){ +int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ int rc = SQLITE_OK; - Pgno iMax = pLog->hdr.iLastPg; + Pgno iMax = pWal->hdr.iLastPg; Pgno iFrame; - assert( pLog->isWriteLocked ); - logSummaryReadHdr(pLog, 0); - for(iFrame=pLog->hdr.iLastPg+1; iFrame<=iMax && rc==SQLITE_OK; iFrame++){ - rc = xUndo(pUndoCtx, pLog->pSummary->aData[logSummaryEntry(iFrame)]); + assert( pWal->lockState==SQLITE_SHM_WRITE ); + walIndexReadHdr(pWal, 0); + for(iFrame=pWal->hdr.iLastPg+1; iFrame<=iMax && rc==SQLITE_OK; iFrame++){ + rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]); } + walIndexUnmap(pWal); return rc; } -u32 sqlite3WalSavepoint(Log *pLog){ - assert( pLog->isWriteLocked ); - return pLog->hdr.iLastPg; +/* Return an integer that records the current (uncommitted) write +** position in the WAL +*/ +u32 sqlite3WalSavepoint(Wal *pWal){ + assert( pWal->lockState==SQLITE_SHM_WRITE ); + return pWal->hdr.iLastPg; } -int sqlite3WalSavepointUndo(Log *pLog, u32 iFrame){ +/* Move the write position of the WAL back to iFrame. Called in +** response to a ROLLBACK TO command. +*/ +int sqlite3WalSavepointUndo(Wal *pWal, u32 iFrame){ int rc = SQLITE_OK; u8 aCksum[8]; - assert( pLog->isWriteLocked ); + assert( pWal->lockState==SQLITE_SHM_WRITE ); - pLog->hdr.iLastPg = iFrame; + pWal->hdr.iLastPg = iFrame; if( iFrame>0 ){ - i64 iOffset = logFrameOffset(iFrame, pLog->hdr.pgsz) + sizeof(u32)*2; - rc = sqlite3OsRead(pLog->pFd, aCksum, sizeof(aCksum), iOffset); - pLog->hdr.iCheck1 = sqlite3Get4byte(&aCksum[0]); - pLog->hdr.iCheck2 = sqlite3Get4byte(&aCksum[4]); + i64 iOffset = walFrameOffset(iFrame, pWal->hdr.pgsz) + sizeof(u32)*2; + rc = sqlite3OsRead(pWal->pFd, aCksum, sizeof(aCksum), iOffset); + pWal->hdr.iCheck1 = sqlite3Get4byte(&aCksum[0]); + pWal->hdr.iCheck2 = sqlite3Get4byte(&aCksum[4]); } return rc; @@ -1722,9 +1046,9 @@ int sqlite3WalSavepointUndo(Log *pLog, u32 iFrame){ /* ** Return true if data has been written but not committed to the log file. */ -int sqlite3WalDirty(Log *pLog){ - assert( pLog->isWriteLocked ); - return( pLog->hdr.iLastPg!=((LogSummaryHdr*)pLog->pSummary->aData)->iLastPg ); +int sqlite3WalDirty(Wal *pWal){ + assert( pWal->lockState==SQLITE_SHM_WRITE ); + return( pWal->hdr.iLastPg!=((WalIndexHdr*)pWal->pWiData)->iLastPg ); } /* @@ -1732,7 +1056,7 @@ int sqlite3WalDirty(Log *pLog){ ** on the log file (obtained using sqlite3WalWriteLock()). */ int sqlite3WalFrames( - Log *pLog, /* Log handle to write to */ + Wal *pWal, /* Wal handle to write to */ int nPgsz, /* Database page-size in bytes */ PgHdr *pList, /* List of dirty pages to write */ Pgno nTruncate, /* Database size after this commit */ @@ -1741,52 +1065,53 @@ int sqlite3WalFrames( ){ int rc; /* Used to catch return codes */ u32 iFrame; /* Next frame address */ - u8 aFrame[LOG_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ + u8 aFrame[WAL_FRAME_HDRSIZE]; /* Buffer to assemble frame-header in */ PgHdr *p; /* Iterator to run through pList with. */ u32 aCksum[2]; /* Checksums */ PgHdr *pLast; /* Last frame in list */ int nLast = 0; /* Number of extra copies of last page */ - assert( LOG_FRAME_HDRSIZE==(4 * 2 + 2*sizeof(u32)) ); + assert( WAL_FRAME_HDRSIZE==(4 * 2 + 2*sizeof(u32)) ); assert( pList ); + assert( pWal->lockState==SQLITE_SHM_WRITE ); /* If this is the first frame written into the log, write the log ** header to the start of the log file. See comments at the top of ** this file for a description of the log-header format. */ - assert( LOG_FRAME_HDRSIZE>=LOG_HDRSIZE ); - iFrame = pLog->hdr.iLastPg; + assert( WAL_FRAME_HDRSIZE>=WAL_HDRSIZE ); + iFrame = pWal->hdr.iLastPg; if( iFrame==0 ){ sqlite3Put4byte(aFrame, nPgsz); sqlite3_randomness(8, &aFrame[4]); - pLog->hdr.iCheck1 = sqlite3Get4byte(&aFrame[4]); - pLog->hdr.iCheck2 = sqlite3Get4byte(&aFrame[8]); - rc = sqlite3OsWrite(pLog->pFd, aFrame, LOG_HDRSIZE, 0); + pWal->hdr.iCheck1 = sqlite3Get4byte(&aFrame[4]); + pWal->hdr.iCheck2 = sqlite3Get4byte(&aFrame[8]); + rc = sqlite3OsWrite(pWal->pFd, aFrame, WAL_HDRSIZE, 0); if( rc!=SQLITE_OK ){ return rc; } } - aCksum[0] = pLog->hdr.iCheck1; - aCksum[1] = pLog->hdr.iCheck2; + aCksum[0] = pWal->hdr.iCheck1; + aCksum[1] = pWal->hdr.iCheck2; /* Write the log file. */ for(p=pList; p; p=p->pDirty){ u32 nDbsize; /* Db-size field for frame header */ i64 iOffset; /* Write offset in log file */ - iOffset = logFrameOffset(++iFrame, nPgsz); + iOffset = walFrameOffset(++iFrame, nPgsz); /* Populate and write the frame header */ nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0; - logEncodeFrame(aCksum, p->pgno, nDbsize, nPgsz, p->pData, aFrame); - rc = sqlite3OsWrite(pLog->pFd, aFrame, sizeof(aFrame), iOffset); + walEncodeFrame(aCksum, p->pgno, nDbsize, nPgsz, p->pData, aFrame); + rc = sqlite3OsWrite(pWal->pFd, aFrame, sizeof(aFrame), iOffset); if( rc!=SQLITE_OK ){ return rc; } /* Write the page data */ - rc = sqlite3OsWrite(pLog->pFd, p->pData, nPgsz, iOffset + sizeof(aFrame)); + rc = sqlite3OsWrite(pWal->pFd, p->pData, nPgsz, iOffset + sizeof(aFrame)); if( rc!=SQLITE_OK ){ return rc; } @@ -1795,8 +1120,8 @@ int sqlite3WalFrames( /* Sync the log file if the 'isSync' flag was specified. */ if( sync_flags ){ - i64 iSegment = sqlite3OsSectorSize(pLog->pFd); - i64 iOffset = logFrameOffset(iFrame+1, nPgsz); + i64 iSegment = sqlite3OsSectorSize(pWal->pFd); + i64 iOffset = walFrameOffset(iFrame+1, nPgsz); assert( isCommit ); @@ -1805,14 +1130,14 @@ int sqlite3WalFrames( } iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment); while( iOffsetpgno,nTruncate,nPgsz,pLast->pData,aFrame); - rc = sqlite3OsWrite(pLog->pFd, aFrame, sizeof(aFrame), iOffset); + walEncodeFrame(aCksum,pLast->pgno,nTruncate,nPgsz,pLast->pData,aFrame); + rc = sqlite3OsWrite(pWal->pFd, aFrame, sizeof(aFrame), iOffset); if( rc!=SQLITE_OK ){ return rc; } - iOffset += LOG_FRAME_HDRSIZE; - rc = sqlite3OsWrite(pLog->pFd, pLast->pData, nPgsz, iOffset); + iOffset += WAL_FRAME_HDRSIZE; + rc = sqlite3OsWrite(pWal->pFd, pLast->pData, nPgsz, iOffset); if( rc!=SQLITE_OK ){ return rc; } @@ -1820,44 +1145,44 @@ int sqlite3WalFrames( iOffset += nPgsz; } - rc = sqlite3OsSync(pLog->pFd, sync_flags); + rc = sqlite3OsSync(pWal->pFd, sync_flags); if( rc!=SQLITE_OK ){ return rc; } } /* Append data to the log summary. It is not necessary to lock the - ** log-summary to do this as the RESERVED lock held on the db file + ** wal-index to do this as the RESERVED lock held on the db file ** guarantees that there are no other writers, and no data that may ** be in use by existing readers is being overwritten. */ - iFrame = pLog->hdr.iLastPg; + iFrame = pWal->hdr.iLastPg; for(p=pList; p; p=p->pDirty){ iFrame++; - logSummaryAppend(pLog->pSummary, iFrame, p->pgno); + walIndexAppend(pWal, iFrame, p->pgno); } while( nLast>0 ){ iFrame++; nLast--; - logSummaryAppend(pLog->pSummary, iFrame, pLast->pgno); + walIndexAppend(pWal, iFrame, pLast->pgno); } /* Update the private copy of the header. */ - pLog->hdr.pgsz = nPgsz; - pLog->hdr.iLastPg = iFrame; + pWal->hdr.pgsz = nPgsz; + pWal->hdr.iLastPg = iFrame; if( isCommit ){ - pLog->hdr.iChange++; - pLog->hdr.nPage = nTruncate; + pWal->hdr.iChange++; + pWal->hdr.nPage = nTruncate; } - pLog->hdr.iCheck1 = aCksum[0]; - pLog->hdr.iCheck2 = aCksum[1]; + pWal->hdr.iCheck1 = aCksum[0]; + pWal->hdr.iCheck2 = aCksum[1]; - /* If this is a commit, update the log-summary header too. */ - if( isCommit && SQLITE_OK==(rc = logEnterMutex(pLog)) ){ - logSummaryWriteHdr(pLog->pSummary, &pLog->hdr); - logLeaveMutex(pLog); - pLog->iCallback = iFrame; + /* If this is a commit, update the wal-index header too. */ + if( isCommit ){ + walIndexWriteHdr(pWal, &pWal->hdr); + pWal->iCallback = iFrame; } + walIndexUnmap(pWal); return rc; } @@ -1865,14 +1190,13 @@ int sqlite3WalFrames( /* ** Checkpoint the database: ** -** 1. Wait for an EXCLUSIVE lock on regions B and C. -** 2. Wait for an EXCLUSIVE lock on region A. -** 3. Copy the contents of the log into the database file. -** 4. Zero the log-summary header (so new readers will ignore the log). -** 5. Drop the locks obtained in steps 1 and 2. +** 1. Acquire a CHECKPOINT lock +** 2. Copy the contents of the log into the database file. +** 3. Zero the wal-index header (so new readers will ignore the log). +** 4. Drop the CHECKPOINT lock. */ int sqlite3WalCheckpoint( - Log *pLog, /* Log connection */ + Wal *pWal, /* Wal connection */ sqlite3_file *pFd, /* File descriptor open on db file */ int sync_flags, /* Flags to sync db file with (or 0) */ u8 *zBuf, /* Temporary buffer to use */ @@ -1882,48 +1206,47 @@ int sqlite3WalCheckpoint( int rc; /* Return code */ int isChanged = 0; /* True if a new wal-index header is loaded */ - assert( !pLog->isLocked ); + assert( pWal->lockState==SQLITE_SHM_UNLOCK ); - /* Wait for an EXCLUSIVE lock on regions B and C. */ + /* Get the CHECKPOINT lock */ do { - rc = logLockRegion(pLog, LOG_REGION_B|LOG_REGION_C, LOG_WRLOCK); - }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) ); - if( rc!=SQLITE_OK ) return rc; - - /* Wait for an EXCLUSIVE lock on region A. */ - do { - rc = logLockRegion(pLog, LOG_REGION_A, LOG_WRLOCK); + rc = walSetLock(pWal, SQLITE_SHM_CHECKPOINT); }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) ); if( rc!=SQLITE_OK ){ - logLockRegion(pLog, LOG_REGION_B|LOG_REGION_C, LOG_UNLOCK); + walSetLock(pWal, SQLITE_SHM_UNLOCK); return rc; } /* Copy data from the log to the database file. */ - rc = logSummaryReadHdr(pLog, &isChanged); + rc = walIndexReadHdr(pWal, &isChanged); if( rc==SQLITE_OK ){ - rc = logCheckpoint(pLog, pFd, sync_flags, zBuf); + rc = walCheckpoint(pWal, pFd, sync_flags, zBuf); } if( isChanged ){ /* If a new wal-index header was loaded before the checkpoint was - ** performed, then the pager-cache associated with log pLog is now + ** performed, then the pager-cache associated with log pWal is now ** out of date. So zero the cached wal-index header to ensure that ** next time the pager opens a snapshot on this database it knows that ** the cache needs to be reset. */ - memset(&pLog->hdr, 0, sizeof(LogSummaryHdr)); + memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); } /* Release the locks. */ - logLockRegion(pLog, LOG_REGION_A|LOG_REGION_B|LOG_REGION_C, LOG_UNLOCK); + walSetLock(pWal, SQLITE_SHM_UNLOCK); return rc; } -int sqlite3WalCallback(Log *pLog){ +/* Return the value to pass to a sqlite3_wal_hook callback, the +** number of frames in the WAL at the point of the last commit since +** sqlite3WalCallback() was called. If no commits have occurred since +** the last call, then return 0. +*/ +int sqlite3WalCallback(Wal *pWal){ u32 ret = 0; - if( pLog ){ - ret = pLog->iCallback; - pLog->iCallback = 0; + if( pWal ){ + ret = pWal->iCallback; + pWal->iCallback = 0; } return (int)ret; } diff --git a/src/wal.h b/src/wal.h index f04344c63a..4bab929f01 100644 --- a/src/wal.h +++ b/src/wal.h @@ -22,11 +22,11 @@ /* Connection to a write-ahead log (WAL) file. ** There is one object of this type for each pager. */ -typedef struct Log Log; +typedef struct Wal Wal; /* Open and close a connection to a write-ahead log. */ -int sqlite3WalOpen(sqlite3_vfs*, const char *zDb, Log **ppLog); -int sqlite3WalClose(Log *pLog, sqlite3_file *pFd, int sync_flags, u8 *zBuf); +int sqlite3WalOpen(sqlite3_vfs*, const char *zDb, Wal **ppWal); +int sqlite3WalClose(Wal *pWal, sqlite3_file *pFd, int sync_flags, u8 *zBuf); /* Used by readers to open (lock) and close (unlock) a snapshot. A ** snapshot is like a read-transaction. It is the state of the database @@ -35,39 +35,39 @@ int sqlite3WalClose(Log *pLog, sqlite3_file *pFd, int sync_flags, u8 *zBuf); ** write to or checkpoint the WAL. sqlite3WalCloseSnapshot() closes the ** transaction and releases the lock. */ -int sqlite3WalOpenSnapshot(Log *pLog, int *); -void sqlite3WalCloseSnapshot(Log *pLog); +int sqlite3WalOpenSnapshot(Wal *pWal, int *); +void sqlite3WalCloseSnapshot(Wal *pWal); /* Read a page from the write-ahead log, if it is present. */ -int sqlite3WalRead(Log *pLog, Pgno pgno, int *pInLog, u8 *pOut); +int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, u8 *pOut); /* Return the size of the database as it existed at the beginning ** of the snapshot */ -void sqlite3WalDbsize(Log *pLog, Pgno *pPgno); +void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno); /* Obtain or release the WRITER lock. */ -int sqlite3WalWriteLock(Log *pLog, int op); +int sqlite3WalWriteLock(Wal *pWal, int op); /* Undo any frames written (but not committed) to the log */ -int sqlite3WalUndo(Log *pLog, int (*xUndo)(void *, Pgno), void *pUndoCtx); +int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx); /* Return an integer that records the current (uncommitted) write ** position in the WAL */ -u32 sqlite3WalSavepoint(Log *pLog); +u32 sqlite3WalSavepoint(Wal *pWal); /* Move the write position of the WAL back to iFrame. Called in ** response to a ROLLBACK TO command. */ -int sqlite3WalSavepointUndo(Log *pLog, u32 iFrame); +int sqlite3WalSavepointUndo(Wal *pWal, u32 iFrame); /* Return true if data has been written but not committed to the log file. */ -int sqlite3WalDirty(Log *pLog); +int sqlite3WalDirty(Wal *pWal); /* Write a frame or frames to the log. */ -int sqlite3WalFrames(Log *pLog, int, PgHdr *, Pgno, int, int); +int sqlite3WalFrames(Wal *pWal, int, PgHdr *, Pgno, int, int); /* Copy pages from the log to the database file */ int sqlite3WalCheckpoint( - Log *pLog, /* Log connection */ + Wal *pWal, /* Write-ahead log connection */ sqlite3_file *pFd, /* File descriptor open on db file */ int sync_flags, /* Flags to sync db file with (or 0) */ u8 *zBuf, /* Temporary buffer to use */ @@ -80,6 +80,6 @@ int sqlite3WalCheckpoint( ** sqlite3WalCallback() was called. If no commits have occurred since ** the last call, then return 0. */ -int sqlite3WalCallback(Log *pLog); +int sqlite3WalCallback(Wal *pWal); #endif /* _WAL_H_ */