Allow writers to write dirty pages to the log mid-transaction in order to free memory.

FossilOrigin-Name: ecd828f96909895535d7dc744e5a8530e234e04d
This commit is contained in:
dan 2010-04-15 16:45:34 +00:00
parent bb2e9c97fc
commit 4cc6fb6165
4 changed files with 170 additions and 76 deletions

View File

@ -1,5 +1,5 @@
C Merge\stwo\sleaves\son\sthe\sWAL\sbranch.
D 2010-04-15T13:33:18
C Allow\swriters\sto\swrite\sdirty\spages\sto\sthe\slog\smid-transaction\sin\sorder\sto\sfree\smemory.
D 2010-04-15T16:45:35
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in 4f2f967b7e58a35bb74fb7ec8ae90e0f4ca7868b
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@ -154,7 +154,7 @@ F src/os_common.h 240c88b163b02c21a9f21f87d49678a0aa21ff30
F src/os_os2.c 75a8c7b9a00a2cf1a65f9fa4afbc27d46634bb2f
F src/os_unix.c 5bf0015cebe2f21635da2af983c348eb88b3b4c1
F src/os_win.c 1c7453c2df4dab26d90ff6f91272aea18bcf7053
F src/pager.c 9e9ee38c923fd225d73127751b7959bd826d0686
F src/pager.c 80054194c18631342f6b8253e0bb090020c0cd55
F src/pager.h ce5d076f3860a5f2d7460c582cd68383343b33cf
F src/parse.y ace5c7a125d9f2a410e431ee3209034105045f7e
F src/pcache.c ace8f6a5ecd4711cc66a1b23053be7109bd437cf
@ -757,7 +757,7 @@ F test/vtabE.test 7c4693638d7797ce2eda17af74292b97e705cc61
F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5
F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8
F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d
F test/wal.test 8f480128b140e54253684bc395f7af0254dc4e03
F test/wal.test b0b6c02bbb0b2a647b4edb69325d4c59ac73259b
F test/walcrash.test 45cfbab30bb7cbe0b2e9d5cabe90dbcad10cb89b
F test/walslow.test 38076d5fad49e3678027be0f8110e6a32d531dc2
F test/walthread.test 58cd64b06f186251f09f64e4918fb74a7e52c963
@ -804,7 +804,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P 84955c2e9ce526c5a3ed479aa09f093a7e37c7d0 33b1f584ef712625c4df8e2aefe895fa89f6a795
R bcaf3c70b81f92c4b906909c026342d3
P c9ed66cc3994b3b0d67a6c950b552a1a869f2ed2
R 0dae17f227f2dd17e65e49bd0f5ce0a8
U dan
Z 3782bee1cb14af26018fee50d035e5d0
Z b4af52effa56ad5510bfc4469b8a847d

View File

@ -1 +1 @@
c9ed66cc3994b3b0d67a6c950b552a1a869f2ed2
ecd828f96909895535d7dc744e5a8530e234e04d

View File

@ -3253,76 +3253,80 @@ static int pagerStress(void *p, PgHdr *pPg){
assert( pPg->pPager==pPager );
assert( pPg->flags&PGHDR_DIRTY );
if( pagerUseLog(pPager) ) return SQLITE_OK;
/* The doNotSync flag is set by the sqlite3PagerWrite() function while it
** is journalling a set of two or more database pages that are stored
** on the same disk sector. Syncing the journal is not allowed while
** this is happening as it is important that all members of such a
** set of pages are synced to disk together. So, if the page this function
** is trying to make clean will require a journal sync and the doNotSync
** flag is set, return without doing anything. The pcache layer will
** just have to go ahead and allocate a new page buffer instead of
** reusing pPg.
**
** Similarly, if the pager has already entered the error state, do not
** try to write the contents of pPg to disk.
*/
if( NEVER(pPager->errCode)
|| (pPager->doNotSync && pPg->flags&PGHDR_NEED_SYNC)
){
return SQLITE_OK;
}
/* Sync the journal file if required. */
if( pPg->flags&PGHDR_NEED_SYNC ){
rc = syncJournal(pPager);
if( rc==SQLITE_OK && pPager->fullSync &&
!(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) &&
!(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
if( pagerUseLog(pPager) ){
/* Write a single frame for this page to the log. */
assert( pPg->pDirty==0 );
rc = sqlite3LogFrames(pPager->pLog, pPager->pageSize, pPg, 0, 0, 0);
}else{
/* The doNotSync flag is set by the sqlite3PagerWrite() function while it
** is journalling a set of two or more database pages that are stored
** on the same disk sector. Syncing the journal is not allowed while
** this is happening as it is important that all members of such a
** set of pages are synced to disk together. So, if the page this function
** is trying to make clean will require a journal sync and the doNotSync
** flag is set, return without doing anything. The pcache layer will
** just have to go ahead and allocate a new page buffer instead of
** reusing pPg.
**
** Similarly, if the pager has already entered the error state, do not
** try to write the contents of pPg to disk.
*/
if( NEVER(pPager->errCode)
|| (pPager->doNotSync && pPg->flags&PGHDR_NEED_SYNC)
){
pPager->nRec = 0;
rc = writeJournalHdr(pPager);
return SQLITE_OK;
}
/* Sync the journal file if required. */
if( pPg->flags&PGHDR_NEED_SYNC ){
rc = syncJournal(pPager);
if( rc==SQLITE_OK && pPager->fullSync &&
!(pPager->journalMode==PAGER_JOURNALMODE_MEMORY) &&
!(sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND)
){
pPager->nRec = 0;
rc = writeJournalHdr(pPager);
}
}
/* If the page number of this page is larger than the current size of
** the database image, it may need to be written to the sub-journal.
** This is because the call to pager_write_pagelist() below will not
** actually write data to the file in this case.
**
** Consider the following sequence of events:
**
** BEGIN;
** <journal page X>
** <modify page X>
** SAVEPOINT sp;
** <shrink database file to Y pages>
** pagerStress(page X)
** ROLLBACK TO sp;
**
** If (X>Y), then when pagerStress is called page X will not be written
** out to the database file, but will be dropped from the cache. Then,
** following the "ROLLBACK TO sp" statement, reading page X will read
** data from the database file. This will be the copy of page X as it
** was when the transaction started, not as it was when "SAVEPOINT sp"
** was executed.
**
** The solution is to write the current data for page X into the
** sub-journal file now (if it is not already there), so that it will
** be restored to its current value when the "ROLLBACK TO sp" is
** executed.
*/
if( NEVER(
rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg)
) ){
rc = subjournalPage(pPg);
}
/* Write the contents of the page out to the database file. */
if( rc==SQLITE_OK ){
pPg->pDirty = 0;
rc = pager_write_pagelist(pPg);
}
}
/* If the page number of this page is larger than the current size of
** the database image, it may need to be written to the sub-journal.
** This is because the call to pager_write_pagelist() below will not
** actually write data to the file in this case.
**
** Consider the following sequence of events:
**
** BEGIN;
** <journal page X>
** <modify page X>
** SAVEPOINT sp;
** <shrink database file to Y pages>
** pagerStress(page X)
** ROLLBACK TO sp;
**
** If (X>Y), then when pagerStress is called page X will not be written
** out to the database file, but will be dropped from the cache. Then,
** following the "ROLLBACK TO sp" statement, reading page X will read
** data from the database file. This will be the copy of page X as it
** was when the transaction started, not as it was when "SAVEPOINT sp"
** was executed.
**
** The solution is to write the current data for page X into the
** sub-journal file now (if it is not already there), so that it will
** be restored to its current value when the "ROLLBACK TO sp" is
** executed.
*/
if( NEVER(
rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg)
) ){
rc = subjournalPage(pPg);
}
/* Write the contents of the page out to the database file. */
if( rc==SQLITE_OK ){
pPg->pDirty = 0;
rc = pager_write_pagelist(pPg);
}
/* Mark the page as clean. */

View File

@ -531,5 +531,95 @@ foreach code [list {
catch { close $::code3_chan }
}
#-------------------------------------------------------------------------
# This block of tests, wal-11.*, test that nothing goes terribly wrong
# if frames must be written to the log file before a transaction is
# committed (in order to free up memory).
#
do_test wal-11.1 {
reopen_db
execsql {
PRAGMA cache_size = 10;
PRAGMA page_size = 1024;
CREATE TABLE t1(x PRIMARY KEY);
}
list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {0 3}
do_test wal-11.2 {
execsql { PRAGMA checkpoint }
list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {3 3}
do_test wal-11.3 {
execsql { INSERT INTO t1 VALUES( randomblob(900) ) }
list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {3 4}
do_test wal-11.4 {
execsql {
BEGIN;
INSERT INTO t1 SELECT randomblob(900) FROM t1; -- 2
INSERT INTO t1 SELECT randomblob(900) FROM t1; -- 4
INSERT INTO t1 SELECT randomblob(900) FROM t1; -- 8
INSERT INTO t1 SELECT randomblob(900) FROM t1; -- 16
}
list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {3 33}
do_test wal-11.5 {
execsql {
SELECT count(*) FROM t1;
PRAGMA integrity_check;
}
} {16 ok}
do_test wal-11.6 {
execsql COMMIT
list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {3 42}
do_test wal-11.7 {
execsql {
SELECT count(*) FROM t1;
PRAGMA integrity_check;
}
} {16 ok}
do_test wal-11.8 {
execsql { PRAGMA checkpoint }
list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {37 42}
do_test wal-11.9 {
db close
sqlite3_wal db test.db
list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {37 0}
do_test wal-11.10 {
execsql {
PRAGMA cache_size = 10;
BEGIN;
INSERT INTO t1 SELECT randomblob(900) FROM t1; -- 32
SELECT count(*) FROM t1;
}
list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {37 38}
do_test wal-11.11 {
execsql {
SELECT count(*) FROM t1;
ROLLBACK;
SELECT count(*) FROM t1;
}
} {32 16}
do_test wal-11.12 {
list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {37 38}
do_test wal-11.13 {
execsql {
INSERT INTO t1 VALUES( randomblob(900) );
SELECT count(*) FROM t1;
PRAGMA integrity_check;
}
} {17 ok}
do_test wal-11.14 {
list [expr [file size test.db]/1024] [expr [file size test.db-wal]/1044]
} {37 38}
finish_test