From 85bc6df2f197c3f5f040b8c17c567b1cf5b20e12 Mon Sep 17 00:00:00 2001 From: drh Date: Fri, 10 Nov 2017 20:00:50 +0000 Subject: [PATCH] Improved comments and variable names in the read-only WAL logic. FossilOrigin-Name: d3c25740eec9a2a41c29e6e488fcf6587c1fb821147a442c29439b25a92154a5 --- manifest | 14 +++--- manifest.uuid | 2 +- src/wal.c | 131 ++++++++++++++++++++++++++++++-------------------- 3 files changed, 87 insertions(+), 60 deletions(-) diff --git a/manifest b/manifest index 453bed54e5..c2392bfdac 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Avoid\ssuperfluous\sSHM\sunlock\scall\sin\sthe\sWin32\sVFS. -D 2017-11-09T23:24:29.716 +C Improved\scomments\sand\svariable\snames\sin\sthe\sread-only\sWAL\slogic. +D 2017-11-10T20:00:50.239 F Makefile.in 5bae3f2f3d42f2ad52b141562d74872c97ac0fca6c54953c91bb150a0e6427a8 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 3a5cb477ec3ce5274663b693164e349db63348667cd45bad78cc13d580b691e2 @@ -543,7 +543,7 @@ F src/vdbesort.c 731a09e5cb9e96b70c394c1b7cf3860fbe84acca7682e178615eb941a3a0ef2 F src/vdbetrace.c 48e11ebe040c6b41d146abed2602e3d00d621d7ebe4eb29b0a0f1617fd3c2f6c F src/vtab.c 0e4885495172e1bdf54b12cce23b395ac74ef5729031f15e1bc1e3e6b360ed1a F src/vxworks.h d2988f4e5a61a4dfe82c6524dd3d6e4f2ce3cdb9 -F src/wal.c 6903d391b6c224e45910795d66f97151428662c5f459d3b69997209dbcadc6f5 +F src/wal.c 47f8a4493e077b312399b62fd36fd4e8b70535dcd8d9a5caf18b8be019fcf420 F src/wal.h 8de5d2d3de0956d6f6cb48c83a4012d5f227b8fe940f3a349a4b7e85ebcb492a F src/walker.c d591e8a9ccf60abb010966b354fcea4aa08eba4d83675c2b281a8764c76cc22f F src/where.c b7a075f5fb3d912a891dcc3257f538372bb4a1622dd8ca7d752ad95ce8949ba4 @@ -1669,7 +1669,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P a2908e2c88f7a30638a7e791fc7ad0325b663097c12cecd1f4726b0d60a9a3ed -R 9d6be609689f3743af6adbbc5cf401b2 -U mistachkin -Z 39f42e393c5183ce2049496b416ae681 +P 5a384be6979b783d1f3af2ac6307e7e731c415d052f9405f04c0216f59414633 +R 4efdec1d3561b29c7744e3cfcf4d5f7b +U drh +Z df395ed2f6beb277e98c44e2de48326a diff --git a/manifest.uuid b/manifest.uuid index ba222182ca..354097e8fa 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -5a384be6979b783d1f3af2ac6307e7e731c415d052f9405f04c0216f59414633 \ No newline at end of file +d3c25740eec9a2a41c29e6e488fcf6587c1fb821147a442c29439b25a92154a5 \ No newline at end of file diff --git a/src/wal.c b/src/wal.c index afd0e3f7b6..aff0e97fa2 100644 --- a/src/wal.c +++ b/src/wal.c @@ -455,7 +455,7 @@ struct Wal { u8 truncateOnCommit; /* True to truncate WAL file on commit */ u8 syncHeader; /* Fsync the WAL header if true */ u8 padToSectorBoundary; /* Pad transactions out to the next sector */ - u8 bUnlocked; + u8 bShmUnreliable; /* SHM content is read-only and unreliable */ WalIndexHdr hdr; /* Wal-index header for current transaction */ u32 minFrame; /* Ignore wal frames before this one */ u32 iReCksum; /* On commit, recalculate checksums from here */ @@ -1271,7 +1271,7 @@ recovery_error: ** Close an open wal-index. */ static void walIndexClose(Wal *pWal, int isDelete){ - if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE || pWal->bUnlocked ){ + if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE || pWal->bShmUnreliable ){ int i; for(i=0; inWiData; i++){ sqlite3_free((void *)pWal->apWiData[i]); @@ -2099,14 +2099,22 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ */ assert( pChanged ); rc = walIndexPage(pWal, 0, &page0); - if( rc==SQLITE_READONLY_CANTINIT ){ - assert( page0==0 && pWal->writeLock==0 ); - pWal->bUnlocked = 1; - pWal->exclusiveMode = WAL_HEAPMEMORY_MODE; - *pChanged = 1; - }else if( rc!=SQLITE_OK ){ - return rc; + assert( rc!=SQLITE_READONLY ); /* READONLY changed to OK in walIndexPage */ + if( rc==SQLITE_READONLY_CANTINIT ){ + /* The SQLITE_READONLY_CANTINIT return means that the shared-memory + ** was openable but is not writable, and this thread is unable to + ** confirm that another write-capable connection has the shared-memory + ** open, and hence the content of the shared-memory is unreliable, + ** since the shared-memory might be inconsistent with the WAL file + ** and there is no writer on hand to fix it. */ + assert( page0==0 && pWal->writeLock==0 ); + pWal->bShmUnreliable = 1; + pWal->exclusiveMode = WAL_HEAPMEMORY_MODE; + *pChanged = 1; + }else{ + return rc; /* Any other non-OK return is just an error */ + } }; assert( page0 || pWal->writeLock==0 ); @@ -2122,7 +2130,7 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ */ assert( badHdr==0 || pWal->writeLock==0 ); if( badHdr ){ - if( pWal->bUnlocked==0 && (pWal->readOnly & WAL_SHM_RDONLY) ){ + if( pWal->bShmUnreliable==0 && (pWal->readOnly & WAL_SHM_RDONLY) ){ if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){ walUnlockShared(pWal, WAL_WRITE_LOCK); rc = SQLITE_READONLY_RECOVERY; @@ -2152,10 +2160,10 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){ rc = SQLITE_CANTOPEN_BKPT; } - if( pWal->bUnlocked ){ + if( pWal->bShmUnreliable ){ if( rc!=SQLITE_OK ){ walIndexClose(pWal, 0); - pWal->bUnlocked = 0; + pWal->bShmUnreliable = 0; assert( pWal->nWiData>0 && pWal->apWiData[0]==0 ); if( rc==SQLITE_IOERR_SHORT_READ ) rc = WAL_RETRY; } @@ -2166,11 +2174,21 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ } /* -** Open an "unlocked" transaction. An unlocked transaction is a read -** transaction used by a read-only client in cases where the *-shm -** file cannot be mapped and its contents cannot be trusted. It is -** assumed that the *-wal file has been read and that a wal-index -** constructed in heap memory is currently available in Wal.apWiData[]. +** Open a transaction in a connection where the shared-memory is read-only +** and where we cannot verify that there is a separate write-capable connection +** on hand to keep the shared-memory up-to-date with the WAL file. +** +** This can happen, for example, when the shared-memory is implemented by +** memory-mapping a *-shm file, where a prior writer has shut down and +** left the *-shm file on disk, and now the present connection is trying +** to use that database but lacks write permission on the *-shm file. +** Other scenarios are also possible, depending on the VFS implementation. +** +** Precondition: +** +** The *-wal file has been read and an appropriate wal-index has been +** constructed in pWal->apWiData[] using heap memory instead of shared +** memory. ** ** If this function returns SQLITE_OK, then the read transaction has ** been successfully opened. In this case output variable (*pChanged) @@ -2182,7 +2200,7 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ ** ** If an error occurs, an SQLite error code is returned. */ -static int walBeginUnlocked(Wal *pWal, int *pChanged){ +static int walBeginShmUnreliable(Wal *pWal, int *pChanged){ i64 szWal; /* Size of wal file on disk in bytes */ i64 iOffset; /* Current offset when reading wal file */ u8 aBuf[WAL_HDRSIZE]; /* Buffer to load WAL header into */ @@ -2193,50 +2211,59 @@ static int walBeginUnlocked(Wal *pWal, int *pChanged){ int rc; /* Return code */ u32 aSaveCksum[2]; /* Saved copy of pWal->hdr.aFrameCksum */ - assert( pWal->bUnlocked ); + assert( pWal->bShmUnreliable ); assert( pWal->readOnly & WAL_SHM_RDONLY ); assert( pWal->nWiData>0 && pWal->apWiData[0] ); /* Take WAL_READ_LOCK(0). This has the effect of preventing any - ** live clients from running a checkpoint, but does not stop them + ** writers from running a checkpoint, but does not stop them ** from running recovery. */ rc = walLockShared(pWal, WAL_READ_LOCK(0)); if( rc!=SQLITE_OK ){ if( rc==SQLITE_BUSY ) rc = WAL_RETRY; - goto begin_unlocked_out; + goto begin_unreliable_shm_out; } pWal->readLock = 0; - /* Try to map the *-shm file again. If it succeeds this time, then - ** a non-readonly_shm connection has already connected to the database. - ** In this case, start over with opening the transaction. + /* Check to see if a separate writer has attached to the shared-memory area, + ** thus making the shared-memory "reliable" again. Do this by invoking + ** the xShmMap() routine of the VFS and looking to see if the return + ** is SQLITE_READONLY instead of SQLITE_READONLY_CANTINIT. ** - ** The *-shm file was opened read-only, so sqlite3OsShmMap() can never - ** return SQLITE_OK here, as that would imply that it had established - ** a read/write mapping. A return of SQLITE_READONLY means success - that - ** a mapping has been established to a shared-memory segment that is actively - ** maintained by a writer. SQLITE_READONLY_CANTINIT means that all - ** all connections to the -shm file are read-only and hence the content - ** of the -shm file might be out-of-date. - ** - ** The WAL_READ_LOCK(0) lock held by this client prevents a checkpoint - ** from taking place. But it does not prevent the wal from being wrapped - ** if a checkpoint has already taken place. This means that if another - ** client is connected at this point, it may have already checkpointed - ** the entire wal. In that case it would not be safe to continue with - ** the unlocked transaction, as the other client may overwrite wal - ** frames that this client is still using. */ + ** Once sqlite3OsShmMap() has been called for a file and has returned + ** any SQLITE_READONLY value, it must SQLITE_READONLY or + ** SQLITE_READONLY_CANTINIT or some error for all subsequent invocations, + ** until sqlite3OsShmUnmap() has been called. This is a requirement + ** on the VFS implementation. + ** + ** If the shared-memory is now "reliable" return WAL_RETRY, which will + ** cause the heap-memory WAL-index to be discarded and the actual + ** shared memory to be used in its place. + */ rc = sqlite3OsShmMap(pWal->pDbFd, 0, WALINDEX_PGSZ, 0, &pDummy); assert( rc!=SQLITE_OK ); /* SQLITE_OK not possible for read-only connection */ if( rc!=SQLITE_READONLY_CANTINIT ){ rc = (rc==SQLITE_READONLY ? WAL_RETRY : rc); - goto begin_unlocked_out; + goto begin_unreliable_shm_out; } + /* Reach this point only if the real shared-memory is still unreliable. + ** Assume the in-memory WAL-index substitute is correct and load it + ** into pWal->hdr. + */ memcpy(&pWal->hdr, (void*)walIndexHdr(pWal), sizeof(WalIndexHdr)); + + /* The WAL_READ_LOCK(0) lock held by this client prevents a checkpoint + ** from taking place. But it does not prevent the wal from being wrapped + ** if a checkpoint has already taken place. This means that if another + ** client is connected at this point, it may have already checkpointed + ** the entire wal. In that case it would not be safe to continue with + ** the this transaction, as the other client may overwrite wal + ** frames that this client is still using. + */ rc = sqlite3OsFileSize(pWal->pWalFd, &szWal); if( rc!=SQLITE_OK ){ - goto begin_unlocked_out; + goto begin_unreliable_shm_out; } if( szWalhdr.mxFrame==0 ? SQLITE_OK : WAL_RETRY); - goto begin_unlocked_out; + goto begin_unreliable_shm_out; } /* Check the salt keys at the start of the wal file still match. */ rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); if( rc!=SQLITE_OK ){ - goto begin_unlocked_out; + goto begin_unreliable_shm_out; } if( memcmp(&pWal->hdr.aSalt, &aBuf[16], 8) ){ rc = WAL_RETRY; - goto begin_unlocked_out; + goto begin_unreliable_shm_out; } /* Allocate a buffer to read frames into */ @@ -2265,7 +2292,7 @@ static int walBeginUnlocked(Wal *pWal, int *pChanged){ aFrame = (u8 *)sqlite3_malloc64(szFrame); if( aFrame==0 ){ rc = SQLITE_NOMEM_BKPT; - goto begin_unlocked_out; + goto begin_unreliable_shm_out; } aData = &aFrame[WAL_FRAME_HDRSIZE]; @@ -2298,7 +2325,7 @@ static int walBeginUnlocked(Wal *pWal, int *pChanged){ pWal->hdr.aFrameCksum[0] = aSaveCksum[0]; pWal->hdr.aFrameCksum[1] = aSaveCksum[1]; - begin_unlocked_out: + begin_unreliable_shm_out: sqlite3_free(aFrame); if( rc!=SQLITE_OK ){ int i; @@ -2306,7 +2333,7 @@ static int walBeginUnlocked(Wal *pWal, int *pChanged){ sqlite3_free((void*)pWal->apWiData[i]); pWal->apWiData[i] = 0; } - pWal->bUnlocked = 0; + pWal->bShmUnreliable = 0; sqlite3WalEndReadTransaction(pWal); *pChanged = 1; } @@ -2402,7 +2429,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ if( !useWal ){ assert( rc==SQLITE_OK ); - if( pWal->bUnlocked==0 ){ + if( pWal->bShmUnreliable==0 ){ rc = walIndexReadHdr(pWal, pChanged); } if( rc==SQLITE_BUSY ){ @@ -2433,8 +2460,8 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){ if( rc!=SQLITE_OK ){ return rc; } - else if( pWal->bUnlocked ){ - return walBeginUnlocked(pWal, pChanged); + else if( pWal->bShmUnreliable ){ + return walBeginShmUnreliable(pWal, pChanged); } } @@ -2789,7 +2816,7 @@ int sqlite3WalFindFrame( ** then the WAL is ignored by the reader so return early, as if the ** WAL were empty. */ - if( iLast==0 || (pWal->readLock==0 && pWal->bUnlocked==0) ){ + if( iLast==0 || (pWal->readLock==0 && pWal->bShmUnreliable==0) ){ *piRead = 0; return SQLITE_OK; } @@ -2852,7 +2879,7 @@ int sqlite3WalFindFrame( { u32 iRead2 = 0; u32 iTest; - assert( pWal->bUnlocked || pWal->minFrame>0 ); + assert( pWal->bShmUnreliable || pWal->minFrame>0 ); for(iTest=iLast; iTest>=pWal->minFrame && iTest>0; iTest--){ if( walFramePgno(pWal, iTest)==pgno ){ iRead2 = iTest;