If a writer exits unexpectedly in the middle of a transaction, have the following writer remove any wal-index hash-table entries left by the interrupted transaction.

FossilOrigin-Name: ed77556adcdf7011b95b9969b360269fb2ebe4e5
This commit is contained in:
dan 2010-05-25 10:50:56 +00:00
parent 82043b3077
commit ca6b5baba4
4 changed files with 179 additions and 66 deletions

View File

@ -1,8 +1,5 @@
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
C Remove\sunreachable\scode\sassociated\swith\sWAL\sfrom\sthe\spager.
D 2010-05-25T02:24:01
C If\sa\swriter\sexits\sunexpectedly\sin\sthe\smiddle\sof\sa\stransaction,\shave\sthe\sfollowing\swriter\sremove\sany\swal-index\shash-table\sentries\sleft\sby\sthe\sinterrupted\stransaction.
D 2010-05-25T10:50:57
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in a5cad1f8f3e021356bfcc6c77dc16f6f1952bbc3
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@ -227,7 +224,7 @@ F src/vdbeblob.c 5327132a42a91e8b7acfb60b9d2c3b1c5c863e0e
F src/vdbemem.c 2a82f455f6ca6f78b59fb312f96054c04ae0ead1
F src/vdbetrace.c 864cef96919323482ebd9986f2132435115e9cc2
F src/vtab.c a0f8a40274e4261696ef57aa806de2776ab72cda
F src/wal.c c09f4e33aad4ec3822ef3e9626f8bd7c273542af
F src/wal.c d75a06a34fbe9106a839f2c1d4d775d2f37f4a97
F src/wal.h 111c6f3efd83fe2fc707b29e26431e8eff4c6f28
F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f
F src/where.c 75fee9e255b62f773fcadd1d1f25b6f63ac7a356
@ -769,6 +766,7 @@ F test/wal2.test a10f52b403117c2b9a1f7a11db527c53bb684a25
F test/walbak.test e7650a26eb4b8abeca9b145b1af1e63026dde432
F test/walcksum.test a0712107b6a73397fc7e3f92d5b16e206caa7d3d
F test/walcrash.test f6d5fb2bb108876f04848720a488065d9deef69f
F test/walcrash2.test 22a3a71a83153c306f1fa2b51f98588541d5b842
F test/walfault.test f71d4c9a13d4e27086aef55f1e0e94734ffa2f6a
F test/walhook.test 67e675127f4acb72f061a12667ce6e5460b06b78
F test/walmode.test 6ca9d710cc9f6545b913abcded6d6b0b15641048
@ -817,14 +815,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P 3d252ce5d0d843e4e65beed672598e65c5745129
R ac14f5ddc7bfde994fc5d9b9840c3b63
U drh
Z cf8997a16115d390b555f7bd6076c97d
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iD8DBQFL+zTEoxKgR168RlERAggGAJwON3XhIyWJUiH+/N6YqZBaIRwrXQCbBrgM
la8oM5lf1RPWOhEYr/CldiE=
=tulc
-----END PGP SIGNATURE-----
P 54c1718e6d15a20414cae15895eb5e83217722e2
R 5c5f25e7e1cef98008c39244dc94b13e
U dan
Z b5e4f418b379e4c3437beb0f2d82e4a1

View File

@ -1 +1 @@
54c1718e6d15a20414cae15895eb5e83217722e2
ed77556adcdf7011b95b9969b360269fb2ebe4e5

118
src/wal.c
View File

@ -660,6 +660,60 @@ static void walHashFind(
*piZero = iZero;
}
/*
** Remove entries from the hash table that point to WAL slots greater
** than pWal->hdr.mxFrame.
**
** This function is called whenever pWal->hdr.mxFrame is decreased due
** to a rollback or savepoint.
**
** At most only the very last hash table needs to be updated. Any
** later hash tables will be automatically cleared when pWal->hdr.mxFrame
** advances to the point where those hash tables are actually needed.
*/
static void walCleanupHash(Wal *pWal){
volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table to clear */
volatile u32 *aPgno; /* Unused return from walHashFind() */
u32 iZero; /* frame == (aHash[x]+iZero) */
int iLimit; /* Zero values greater than this */
assert( pWal->lockState==SQLITE_SHM_WRITE );
walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero);
iLimit = pWal->hdr.mxFrame - iZero;
if( iLimit>0 ){
int nByte; /* Number of bytes to zero in aPgno[] */
int i; /* Used to iterate through aHash[] */
for(i=0; i<HASHTABLE_NSLOT; i++){
if( aHash[i]>iLimit ){
aHash[i] = 0;
}
}
/* Zero the entries in the aPgno array that correspond to frames with
** frame numbers greater than pWal->hdr.mxFrame.
*/
nByte = sizeof(u32) * (HASHTABLE_NPAGE-iLimit);
memset((void *)&aPgno[iZero+iLimit+1], 0, nByte);
assert( &((u8 *)&aPgno[iZero+iLimit+1])[nByte]==(u8 *)aHash );
}
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
/* Verify that the every entry in the mapping region is still reachable
** via the hash table even after the cleanup.
*/
{
int i; /* Loop counter */
int iKey; /* Hash key */
for(i=1; i<=iLimit; i++){
for(iKey=walHash(aPgno[i+iZero]); aHash[iKey]; iKey=walNextHash(iKey)){
if( aHash[iKey]==i ) break;
}
assert( aHash[iKey]==i );
}
}
#endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */
}
/*
** Set an entry in the wal-index that will map database page number
@ -692,8 +746,22 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero);
idx = iFrame - iZero;
if( idx==1 ) memset((void*)aHash, 0, HASHTABLE_NBYTE);
if( idx==1 ){
memset((void*)&aPgno[iZero+1], 0, HASHTABLE_NPAGE*sizeof(u32));
memset((void*)aHash, 0, HASHTABLE_NBYTE);
}
assert( idx <= HASHTABLE_NSLOT/2 + 1 );
if( aPgno[iFrame] ){
/* If the entry in aPgno[] is already set, then the previous writer
** must have exited unexpectedly in the middle of a transaction (after
** writing one or more dirty pages to the WAL to free up memory).
** Remove the remnants of that writers uncommitted transaction from
** the hash-table before writing any new entries.
*/
walCleanupHash(pWal);
assert( !aPgno[iFrame] );
}
aPgno[iFrame] = iPage;
for(iKey=walHash(iPage); aHash[iKey]; iKey=walNextHash(iKey)){
assert( nCollide++ < idx );
@ -1552,52 +1620,6 @@ int sqlite3WalWriteLock(Wal *pWal, int op){
return rc;
}
/*
** Remove entries from the hash table that point to WAL slots greater
** than pWal->hdr.mxFrame.
**
** This function is called whenever pWal->hdr.mxFrame is decreased due
** to a rollback or savepoint.
**
** At most only the very last hash table needs to be updated. Any
** later hash tables will be automatically cleared when pWal->hdr.mxFrame
** advances to the point where those hash tables are actually needed.
*/
static void walCleanupHash(Wal *pWal){
volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table to clear */
volatile u32 *aPgno; /* Unused return from walHashFind() */
u32 iZero; /* frame == (aHash[x]+iZero) */
int iLimit; /* Zero values greater than this */
assert( pWal->lockState==SQLITE_SHM_WRITE );
walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero);
iLimit = pWal->hdr.mxFrame - iZero;
if( iLimit>0 ){
int i; /* Used to iterate through aHash[] */
for(i=0; i<HASHTABLE_NSLOT; i++){
if( aHash[i]>iLimit ){
aHash[i] = 0;
}
}
}
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
/* Verify that the every entry in the mapping region is still reachable
** via the hash table even after the cleanup.
*/
{
int i; /* Loop counter */
int iKey; /* Hash key */
for(i=1; i<=iLimit; i++){
for(iKey=walHash(aPgno[i+iZero]); aHash[iKey]; iKey=walNextHash(iKey)){
if( aHash[iKey]==i ) break;
}
assert( aHash[iKey]==i );
}
}
#endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */
}
/*
** If any data has been written (but not committed) to the log file, this
** function moves the write-pointer back to the start of the transaction.
@ -1620,11 +1642,11 @@ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
assert( pWal->pWiData==0 );
rc = walIndexReadHdr(pWal, &unused);
if( rc==SQLITE_OK ){
walCleanupHash(pWal);
for(iFrame=pWal->hdr.mxFrame+1; rc==SQLITE_OK && iFrame<=iMax; iFrame++){
assert( pWal->lockState==SQLITE_SHM_WRITE );
rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]);
}
walCleanupHash(pWal);
}
walIndexUnmap(pWal);
}

100
test/walcrash2.test Normal file
View File

@ -0,0 +1,100 @@
# 2010 May 25
#
# The author disclaims copyright to this source code. In place of
# a legal notice, here is a blessing:
#
# May you do good and not evil.
# May you find forgiveness for yourself and forgive others.
# May you share freely, never taking more than you give.
#
#***********************************************************************
#
set testdir [file dirname $argv0]
source $testdir/tester.tcl
source $testdir/lock_common.tcl
ifcapable !wal {finish_test ; return }
proc wal_file_size {nFrame pgsz} {
expr {24 + ($pgsz+24)*$nFrame}
}
#-------------------------------------------------------------------------
# This test case demonstrates a flaw in the wal-index manipulation that
# existed at one point: If a process crashes mid-transaction, it may have
# already added some entries to one of the hash-tables in the wal-index.
# If the transaction were to be explicitly rolled back at this point, the
# hash-table entries would be removed as part of the rollback. However,
# if the process crashes, the transaction is implicitly rolled back and
# the rogue entries remain in the hash table.
#
# Normally, this causes no problem - readers can tell the difference
# between committed and uncommitted entries in the hash table. However,
# if it happens often enough that all slots in the hash-table become
# non-zero, the next process that attempts to read or write the hash
# table falls into an infinite loop.
#
# Even if run with an SQLite version affected by the bug, this test case
# only goes into an infinite loop if SQLite is compiled without SQLITE_DEBUG
# defined. If SQLITE_DEBUG is defined, the program is halted by a failing
# assert() before entering the infinite loop.
#
# walcrash2-1.1: Create a database. Commit a transaction that adds 8 frames
# to the WAL (and 8 entry to the first hash-table in the
# wal-index).
#
# walcrash2-1.2: Have an external process open a transaction, add 8 entries
# to the wal-index hash-table, then crash. Repeat this 1023
# times (so that the wal-index contains 8192 entries - all
# slots are non-zero).
#
# walcrash2-1.3: Using a new database connection, attempt to query the
# database. This should cause the process to go into the
# infinite loop.
#
do_test walcrash2-1.1 {
execsql {
PRAGMA page_size = 1024;
PRAGMA journal_mode = WAL;
PRAGMA synchronous = NORMAL;
BEGIN;
CREATE TABLE t1(x);
CREATE TABLE t2(x);
CREATE TABLE t3(x);
CREATE TABLE t4(x);
CREATE TABLE t5(x);
CREATE TABLE t6(x);
CREATE TABLE t7(x);
COMMIT;
}
file size test.db-wal
} [wal_file_size 8 1024]
for {set nEntry 8} {$nEntry < 8192} {incr nEntry 8} {
do_test walcrash2-1.2.[expr $nEntry/8] {
set C [launch_testfixture]
testfixture $C {
sqlite3 db test.db
db eval {
PRAGMA cache_size = 15;
BEGIN;
INSERT INTO t1 VALUES(randomblob(900)); -- 1 row, 1 page
INSERT INTO t1 SELECT * FROM t1; -- 2 rows, 3 pages
INSERT INTO t1 SELECT * FROM t1; -- 4 rows, 5 pages
INSERT INTO t1 SELECT * FROM t1; -- 8 rows, 9 pages
INSERT INTO t1 SELECT * FROM t1; -- 16 rows, 17 pages
INSERT INTO t1 SELECT * FROM t1 LIMIT 3; -- 20 rows, 20 pages
}
}
close $C
file size test.db-wal
} [wal_file_size 16 1024]
}
do_test walcrash2-1.3 {
sqlite3 db2 test.db
execsql { SELECT count(*) FROM t1 } db2
} {0}
catch { db2 close }
finish_test