Convert the wal-header and frame-header to 24 bytes. Extra information in

both headers is designed to enhance robustness after crashes, though the
extra information is currently unused.  This is a snapshot of a work in
progress.

FossilOrigin-Name: 669706431f186f92fdc0856a6206419a1e843f46
This commit is contained in:
drh 2010-05-20 16:45:58 +00:00
parent 7c767e1113
commit 23ea97b641
6 changed files with 104 additions and 65 deletions

View File

@ -1,5 +1,8 @@
C Merge\sWIN32\sWAL\ssupport\sinto\strunk.\s\sStill\ssome\sissues\swith\slocking\sto\sresolve.
D 2010-05-19T23:41:55
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
C Convert\sthe\swal-header\sand\sframe-header\sto\s24\sbytes.\s\sExtra\sinformation\sin\nboth\sheaders\sis\sdesigned\sto\senhance\srobustness\safter\scrashes,\sthough\sthe\nextra\sinformation\sis\scurrently\sunused.\s\sThis\sis\sa\ssnapshot\sof\sa\swork\sin\nprogress.
D 2010-05-20T16:45:59
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in a5cad1f8f3e021356bfcc6c77dc16f6f1952bbc3
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@ -224,7 +227,7 @@ F src/vdbeblob.c 5327132a42a91e8b7acfb60b9d2c3b1c5c863e0e
F src/vdbemem.c 2a82f455f6ca6f78b59fb312f96054c04ae0ead1
F src/vdbetrace.c 864cef96919323482ebd9986f2132435115e9cc2
F src/vtab.c a0f8a40274e4261696ef57aa806de2776ab72cda
F src/wal.c b77f0682a9345c4f4472539ba72829e709c5d8d6
F src/wal.c 25969e598b3ce8748295801826cda538232d9200
F src/wal.h 434f76f51225bb614e43ccb6bd2341541ba6a06e
F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f
F src/where.c 75fee9e255b62f773fcadd1d1f25b6f63ac7a356
@ -761,12 +764,12 @@ F test/vtabE.test 7c4693638d7797ce2eda17af74292b97e705cc61
F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5
F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8
F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d
F test/wal.test 4724adbf440df45e31c572d9b740508a6f070da7
F test/wal.test 1ea87f3bc6c597ea6ca10e9f5f819c0e6c0ce2f8
F test/wal2.test 03059bc4d450c37f4b53278ddc3e2c7d53ac2d3f
F test/walbak.test a0e45187c7d8928df035dfea29b99b016b21ca3c
F test/walbak.test e7650a26eb4b8abeca9b145b1af1e63026dde432
F test/walcrash.test f6d5fb2bb108876f04848720a488065d9deef69f
F test/walfault.test 98df47444944a6db2161eed5cef71d6c00bcb8c3
F test/walhook.test c3314e30baf074dd087d52d21ea6f1d09138b0a0
F test/walhook.test 67e675127f4acb72f061a12667ce6e5460b06b78
F test/walmode.test 6ca9d710cc9f6545b913abcded6d6b0b15641048
F test/walslow.test d21625e2e99e11c032ce949e8a94661576548933
F test/walthread.test 1a8c55cd9e3272ba0caa028d8f0ead04989a3378
@ -813,7 +816,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P 9894118d20c08951565c2096552d4a3d2413f7b0 4b69f2cd315b6b66d10e5190d235114788853258
R 22474bd221ea73384053085c0e088ad7
U shaneh
Z 2a837cc3f84e5ea75aa515c136d3a5d4
P 43377663fc3569c361867cdea19e8abaf91a163f
R 47b0032c37d93d195eb69ddfe3dd9d8c
U drh
Z bc14d892f085709c56913a5ad734eeb0
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iD8DBQFL9WdKoxKgR168RlERAjyUAJ9c2KtaS5H2pdO1uUyrfgizw26dywCffxrC
bcVcl+tQYJI9XMFw4UKZNxs=
=Bxmw
-----END PGP SIGNATURE-----

View File

@ -1 +1 @@
43377663fc3569c361867cdea19e8abaf91a163f
669706431f186f92fdc0856a6206419a1e843f46

107
src/wal.c
View File

@ -31,23 +31,28 @@
** used to determine which frames within the WAL are valid and which
** are leftovers from prior checkpoints.
**
** The WAL header is 12 bytes in size and consists of the following three
** The WAL header is 24 bytes in size and consists of the following six
** big-endian 32-bit unsigned integer values:
**
** 0: Database page size,
** 4: Randomly selected salt value 1,
** 8: Randomly selected salt value 2.
** 0: Magic number. 0x377f0682 (big endian)
** 4: File format version. Currently 3007000
** 8: Database page size. Example: 1024
** 12: Checkpoint sequence number
** 16: Salt-1, random integer that changes with each checkpoint
** 20: Salt-2, a different random integer changing with salt-1
**
** Immediately following the header are zero or more frames. Each
** frame consists of a 16-byte header followed by a <page-size> bytes
** of page data. The header is broken into 4 big-endian 32-bit unsigned
** Immediately following the wal-header are zero or more frames. Each
** frame consists of a 24-byte frame-header followed by a <page-size> bytes
** of page data. The frame-header is broken into 6 big-endian 32-bit unsigned
** integer values, as follows:
**
** 0: Page number.
** 4: For commit records, the size of the database image in pages
** after the commit. For all other records, zero.
** 8: Checksum value 1.
** 12: Checksum value 2.
** 8: Checkpoint sequence number (copied from the header)
** 12: Salt-1 (copied from the header)
** 16: Checksum-1.
** 20: Checksum-2.
**
** READER ALGORITHM
**
@ -180,7 +185,9 @@ typedef struct WalIterator WalIterator;
/*
** The following object stores a copy of the wal-index header.
** The following object stores information from the wal-index header.
**
** This object is *not* a copy of the wal-index header.
**
** Member variables iCheck1 and iCheck2 contain the checksum for the
** last frame written to the wal, or 2 and 3 respectively if the log
@ -207,10 +214,10 @@ struct WalIndexHdr {
#define WALINDEX_LOCK_RESERVED 8
/* Size of header before each frame in wal */
#define WAL_FRAME_HDRSIZE 16
#define WAL_FRAME_HDRSIZE 24
/* Size of write ahead log header */
#define WAL_HDRSIZE 12
#define WAL_HDRSIZE 24
/*
** Return the offset of frame iFrame in the write-ahead log file,
@ -238,6 +245,8 @@ struct Wal {
u8 isWindexOpen; /* True if ShmOpen() called on pDbFd */
WalIndexHdr hdr; /* Wal-index for current snapshot */
char *zWalName; /* Name of WAL file */
u32 nCkpt; /* Checkpoint sequence number */
u32 iSalt1, iSalt2; /* Two random salt values */
};
@ -332,13 +341,16 @@ static void walIndexWriteHdr(Wal *pWal, WalIndexHdr *pHdr){
** supplied by the caller. A frame-header is made up of a series of
** 4-byte big-endian integers, as follows:
**
** 0: Database page size in bytes.
** 4: Page number.
** 8: New database size (for commit frames, otherwise zero).
** 12: Frame checksum 1.
** 16: Frame checksum 2.
** 0: Page number.
** 4: For commit records, the size of the database image in pages
** after the commit. For all other records, zero.
** 8: Checkpoint sequence number (copied from the header)
** 12: Salt-1 (copied from the header)
** 16: Checksum-1.
** 20: Checksum-2.
*/
static void walEncodeFrame(
Wal *pWal, /* The write-ahead log */
u32 *aCksum, /* IN/OUT: Checksum values */
u32 iPage, /* Database page number for frame */
u32 nTruncate, /* New db size (or 0 for non-commit frames) */
@ -346,16 +358,18 @@ static void walEncodeFrame(
u8 *aData, /* Pointer to page data (for checksum) */
u8 *aFrame /* OUT: Write encoded frame here */
){
assert( WAL_FRAME_HDRSIZE==16 );
assert( WAL_FRAME_HDRSIZE==24 );
sqlite3Put4byte(&aFrame[0], iPage);
sqlite3Put4byte(&aFrame[4], nTruncate);
sqlite3Put4byte(&aFrame[8], pWal->nCkpt);
sqlite3Put4byte(&aFrame[12], pWal->iSalt1);
walChecksumBytes(aFrame, 8, aCksum);
walChecksumBytes(aData, nData, aCksum);
sqlite3Put4byte(&aFrame[8], aCksum[0]);
sqlite3Put4byte(&aFrame[12], aCksum[1]);
sqlite3Put4byte(&aFrame[16], aCksum[0]);
sqlite3Put4byte(&aFrame[20], aCksum[1]);
}
/*
@ -363,6 +377,7 @@ static void walEncodeFrame(
** frame checksum looks Ok. Otherwise return 0.
*/
static int walDecodeFrame(
Wal *pWal, /* The write-ahead log */
u32 *aCksum, /* IN/OUT: Checksum values */
u32 *piPage, /* OUT: Database page number for frame */
u32 *pnTruncate, /* OUT: New db size (or 0 if not commit) */
@ -370,13 +385,21 @@ static int walDecodeFrame(
u8 *aData, /* Pointer to page data (for checksum) */
u8 *aFrame /* Frame data */
){
assert( WAL_FRAME_HDRSIZE==16 );
assert( WAL_FRAME_HDRSIZE==24 );
#if 0
if( pWal->nCkpt!=sqlite3Get4byte(&aFrame[8]) ){
return 0;
}
if( pWal->iSalt1!=sqlite3Get4byte(&aFrame[12]) ){
return 0;
}
#endif
walChecksumBytes(aFrame, 8, aCksum);
walChecksumBytes(aData, nData, aCksum);
if( aCksum[0]!=sqlite3Get4byte(&aFrame[8])
|| aCksum[1]!=sqlite3Get4byte(&aFrame[12])
if( aCksum[0]!=sqlite3Get4byte(&aFrame[16])
|| aCksum[1]!=sqlite3Get4byte(&aFrame[20])
){
/* Checksum failed. */
return 0;
@ -618,7 +641,7 @@ static int walIndexRecover(Wal *pWal){
}
if( nSize>WAL_FRAME_HDRSIZE ){
u8 aBuf[WAL_FRAME_HDRSIZE]; /* Buffer to load first frame header into */
u8 aBuf[WAL_HDRSIZE]; /* Buffer to load first frame header into */
u8 *aFrame = 0; /* Malloc'd buffer to load entire frame */
int szFrame; /* Number of bytes in buffer aFrame[] */
u8 *aData; /* Pointer to data part of aFrame buffer */
@ -638,12 +661,13 @@ static int walIndexRecover(Wal *pWal){
/* If the database page size is not a power of two, or is greater than
** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid data.
*/
szPage = sqlite3Get4byte(&aBuf[0]);
szPage = sqlite3Get4byte(&aBuf[8]);
if( szPage&(szPage-1) || szPage>SQLITE_MAX_PAGE_SIZE || szPage<512 ){
goto finished;
}
aCksum[0] = sqlite3Get4byte(&aBuf[4]);
aCksum[1] = sqlite3Get4byte(&aBuf[8]);
pWal->nCkpt = sqlite3Get4byte(&aBuf[12]);
aCksum[0] = sqlite3Get4byte(&aBuf[16]);
aCksum[1] = sqlite3Get4byte(&aBuf[20]);
/* Malloc a buffer to read frames into. */
szFrame = szPage + WAL_FRAME_HDRSIZE;
@ -663,7 +687,8 @@ static int walIndexRecover(Wal *pWal){
/* Read and decode the next log frame. */
rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset);
if( rc!=SQLITE_OK ) break;
isValid = walDecodeFrame(aCksum, &pgno, &nTruncate, szPage, aData, aFrame);
isValid = walDecodeFrame(pWal, aCksum, &pgno, &nTruncate, szPage,
aData, aFrame);
if( !isValid ) break;
rc = walIndexAppend(pWal, ++iFrame, pgno);
if( rc!=SQLITE_OK ) break;
@ -1005,6 +1030,7 @@ static int walCheckpoint(
pWal->hdr.iCheck1 = 2;
pWal->hdr.iCheck2 = 3;
walIndexWriteHdr(pWal, &pWal->hdr);
pWal->nCkpt++;
/* TODO: If a crash occurs and the current log is copied into the
** database there is no problem. However, if a crash occurs while
@ -1466,7 +1492,7 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 iFrame){
pWal->hdr.mxFrame = iFrame;
if( iFrame>0 ){
i64 iOffset = walFrameOffset(iFrame, pWal->hdr.szPage) + sizeof(u32)*2;
i64 iOffset = walFrameOffset(iFrame, pWal->hdr.szPage) + sizeof(u32)*4;
rc = sqlite3OsRead(pWal->pWalFd, aCksum, sizeof(aCksum), iOffset);
pWal->hdr.iCheck1 = sqlite3Get4byte(&aCksum[0]);
pWal->hdr.iCheck2 = sqlite3Get4byte(&aCksum[4]);
@ -1495,7 +1521,6 @@ int sqlite3WalFrames(
PgHdr *pLast = 0; /* Last frame in list */
int nLast = 0; /* Number of extra copies of last page */
assert( WAL_FRAME_HDRSIZE==(4 * 2 + 2*sizeof(u32)) );
assert( pList );
assert( pWal->lockState==SQLITE_SHM_WRITE );
assert( pWal->pWiData==0 );
@ -1504,14 +1529,17 @@ int sqlite3WalFrames(
** header to the start of the WAL file. See comments at the top of
** this source file for a description of the WAL header format.
*/
assert( WAL_FRAME_HDRSIZE>=WAL_HDRSIZE );
iFrame = pWal->hdr.mxFrame;
if( iFrame==0 ){
sqlite3Put4byte(aFrame, szPage);
sqlite3_randomness(8, &aFrame[4]);
pWal->hdr.iCheck1 = sqlite3Get4byte(&aFrame[4]);
pWal->hdr.iCheck2 = sqlite3Get4byte(&aFrame[8]);
rc = sqlite3OsWrite(pWal->pWalFd, aFrame, WAL_HDRSIZE, 0);
u8 aWalHdr[WAL_HDRSIZE]; /* Buffer to assembly wal-header in */
sqlite3Put4byte(&aWalHdr[0], 0x377f0682);
sqlite3Put4byte(&aWalHdr[4], 3007000);
sqlite3Put4byte(&aWalHdr[8], szPage);
sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt);
sqlite3_randomness(8, &aWalHdr[16]);
pWal->hdr.iCheck1 = pWal->iSalt1 = sqlite3Get4byte(&aWalHdr[16]);
pWal->hdr.iCheck2 = pWal->iSalt2 = sqlite3Get4byte(&aWalHdr[20]);
rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0);
if( rc!=SQLITE_OK ){
return rc;
}
@ -1529,7 +1557,7 @@ int sqlite3WalFrames(
/* Populate and write the frame header */
nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0;
walEncodeFrame(aCksum, p->pgno, nDbsize, szPage, p->pData, aFrame);
walEncodeFrame(pWal, aCksum, p->pgno, nDbsize, szPage, p->pData, aFrame);
rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
if( rc!=SQLITE_OK ){
return rc;
@ -1553,7 +1581,8 @@ int sqlite3WalFrames(
iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment);
while( iOffset<iSegment ){
walEncodeFrame(aCksum,pLast->pgno,nTruncate,szPage,pLast->pData,aFrame);
walEncodeFrame(pWal, aCksum, pLast->pgno, nTruncate, szPage,
pLast->pData, aFrame);
rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
if( rc!=SQLITE_OK ){
return rc;

View File

@ -41,7 +41,7 @@ proc sqlite3_wal {args} {
}
proc log_file_size {nFrame pgsz} {
expr {12 + ($pgsz+16)*$nFrame}
expr {24 + ($pgsz+24)*$nFrame}
}
proc log_deleted {logfile} {
@ -1160,7 +1160,7 @@ foreach {tn ckpt_cmd ckpt_res ckpt_main ckpt_aux} {
# a transaction that modifies 171 pages. In synchronous=NORMAL mode, this
# produces a log file of:
#
# 12 + (16+512)*171 = 90300 bytes.
# 24 + (24+512)*171 = 90312 bytes.
#
# Slightly larger than 11*8192 = 90112 bytes.
#
@ -1173,13 +1173,13 @@ foreach {tn ckpt_cmd ckpt_res ckpt_main ckpt_aux} {
set old_pending_byte [sqlite3_test_control_pending_byte 0x10000000]
catch { db close }
foreach {tn sectorsize logsize} {
1 128 90828
2 256 90828
3 512 90828
4 1024 91356
5 2048 92412
6 4096 94524
7 8192 98748
1 128 92216
2 256 92216
3 512 92216
4 1024 92216
5 2048 92216
6 4096 94360
7 8192 98648
} {
file delete -force test.db test.db-wal test.db-journal
sqlite3_simulate_device -sectorsize $sectorsize
@ -1263,7 +1263,7 @@ foreach {nFrame result} {
file copy -force testX.db test.db
file copy -force testX.db-wal test.db-wal
hexio_write test.db-wal [expr 12 + $nFrame*(16+1024) + 12] 00000000
hexio_write test.db-wal [expr 24 + $nFrame*(24+1024) + 20] 00000000
sqlite3 db test.db
execsql {
@ -1338,12 +1338,13 @@ foreach {tn pgsz works} {
#
set c1 22
set c2 23
set walhdr [binary format III $pgsz $c1 $c2]
set walhdr [binary format IIIIII 931071618 3007000 $pgsz 1234 $c1 $c2]
set salt1 $c1
set framebody [randomblob $pgsz]
set framehdr [binary format II $pg 5]
logcksum c1 c2 $framehdr
logcksum c1 c2 $framebody
set framehdr [binary format IIII $pg 5 $c1 $c2]
set framehdr [binary format IIIIII $pg 5 1234 $salt1 $c1 $c2]
set fd [open test.db-wal w]
fconfigure $fd -encoding binary -translation binary
puts -nonewline $fd $walhdr

View File

@ -19,7 +19,7 @@ source $testdir/tester.tcl
ifcapable !wal {finish_test ; return }
proc log_file_size {nFrame pgsz} {
expr {12 + ($pgsz+16)*$nFrame}
expr {24 + ($pgsz+24)*$nFrame}
}
# Test organization:
@ -183,4 +183,3 @@ do_test walbak-2.12 {
db2 close
finish_test

View File

@ -23,7 +23,7 @@ source $testdir/tester.tcl
ifcapable !wal {finish_test ; return }
proc log_file_size {nFrame pgsz} {
expr {12 + ($pgsz+16)*$nFrame}
expr {24 + ($pgsz+24)*$nFrame}
}
set ::wal_hook [list]