diff --git a/manifest b/manifest index a45dddeda6..80de0fa850 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,8 @@ -C Changes\sto\sthe\sway\sone\sof\sthe\sWAL/OOM\stests\sworks. -D 2010-05-31T06:38:35 +-----BEGIN PGP SIGNED MESSAGE----- +Hash: SHA1 + +C The\sincremental\scheckpoint\sfeature\sis\snot\sperfect\syet,\sbut\sit\sis\sworking\nwell\senough\sto\smerge\sit\sinto\sthe\strunk. +D 2010-06-01T15:24:30 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.in a5cad1f8f3e021356bfcc6c77dc16f6f1952bbc3 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 @@ -111,7 +114,7 @@ F src/auth.c 523da7fb4979469955d822ff9298352d6b31de34 F src/backup.c de9809091b3b99f69e37261c133f7f8b19f6eca6 F src/bitvec.c 06ad2c36a9c3819c0b9cbffec7b15f58d5d834e0 F src/btmutex.c 96a12f50f7a17475155971a241d85ec5171573ff -F src/btree.c d0414a5f09b0cacb64bd60b91c5a3720585925aa +F src/btree.c 5934a9f5a328488cca392766bb841ff41c9083a9 F src/btree.h dd83041eda10c17daf023257c1fc883b5f71f85a F src/btreeInt.h b0c87f6725b06a0aa194a6d25d54b16ce9d6e291 F src/build.c 11100b66fb97638d2d874c1d34d8db90650bb1d7 @@ -148,13 +151,13 @@ F src/mutex_os2.c 20477db50cf3817c2f1cd3eb61e5c177e50231db F src/mutex_unix.c becb8c4e07616abf84650d3687d62a1461d5d9cd F src/mutex_w32.c fb1cf87c5a88b56c7df0d9ddb796ed9641046c3d F src/notify.c cbfa66a836da3a51567209636e6a94059c137930 -F src/os.c 2285265f7e8035ba77e8e8ec93adf3c3c61fc60e -F src/os.h efcc7f0072ae362b44eab8588b43a943da61504e +F src/os.c 1516984144e26734f97748f891f1a04f9e294c2e +F src/os.h 6f604986f0ef0ca288c2330b16051ff70b431e8c F src/os_common.h a8f95b81eca8a1ab8593d23e94f8a35f35d4078f F src/os_os2.c 665876d5eec7585226b0a1cf5e18098de2b2da19 -F src/os_unix.c 683ba91de68419771b13f2b9a3dc0e439147e199 -F src/os_win.c 81dd8f5434b3b73b1f1567a784811601b6437ce3 -F src/pager.c 5d693cc6273c5406a21f1a2afa18309457273549 +F src/os_unix.c 29dd06f4850672326765218e75cb49d7d618c254 +F src/os_win.c f815403c51a2adad30244374c801dd7fd2734567 +F src/pager.c acbef227bf158776449907c275c5d9332e4e52f9 F src/pager.h 76466c3a5af56943537f68b1f16567101a0cd1d0 F src/parse.y ace5c7a125d9f2a410e431ee3209034105045f7e F src/pcache.c ace8f6a5ecd4711cc66a1b23053be7109bd437cf @@ -168,19 +171,19 @@ F src/resolve.c ac5f1a713cd1ae77f08b83cc69581e11bf5ae6f9 F src/rowset.c 69afa95a97c524ba6faf3805e717b5b7ae85a697 F src/select.c c03d8a0565febcde8c6a12c5d77d065fddae889b F src/shell.c fd4ccdb37c3b68de0623eb938a649e0990710714 -F src/sqlite.h.in a7d351d5ae015179e7ef97a1060c7213b50efd9b +F src/sqlite.h.in c77dd6f7391c7d780622abd221c49d926d32b3b6 F src/sqlite3ext.h 69dfb8116af51b84a029cddb3b35062354270c89 F src/sqliteInt.h c1ca9bed7c963343f90edaf0ec31b8ff4b43fb01 F src/sqliteLimit.h 196e2f83c3b444c4548fc1874f52f84fdbda40f3 F src/status.c 4df6fe7dce2d256130b905847c6c60055882bdbe F src/table.c 2cd62736f845d82200acfa1287e33feb3c15d62e F src/tclsqlite.c 6bc5fbde634b9cb42b3d29d674fa6cd0c22c0881 -F src/test1.c 7e82b944b123360f6637e0c76699713619fb0742 +F src/test1.c e3e0ad1f0763a1aa6adf7e1c50727718eaa4bd32 F src/test2.c 31f1b9d076b4774a22d2605d0af1f34e14a9a7bd F src/test3.c 4c21700c73a890a47fc685c1097bfb661346ac94 F src/test4.c ad03bb987ddedce928f4258c1e7fa4109a73497d F src/test5.c cc55900118fa4add8ec9cf69fc4225a4662f76b1 -F src/test6.c e524e36b10c4cd8fa08899f6245194045e78edbe +F src/test6.c d3e1a771a7b445ec771e64f2af70df5285da8e4c F src/test7.c 3f2d63e4ccf97f8c2cf1a7fa0a3c8e2e2a354e6e F src/test8.c f959db9a22d882013b64c92753fa793b2ce3bdea F src/test9.c bea1e8cf52aa93695487badedd6e1886c321ea60 @@ -190,7 +193,7 @@ F src/test_backup.c c129c91127e9b46e335715ae2e75756e25ba27de F src/test_btree.c 47cd771250f09cdc6e12dda5bc71bc0b3abc96e2 F src/test_config.c 6210f501d358bde619ae761f06f123529c6ba24f F src/test_demovfs.c da81a5f7785bb352bda7911c332a983ec4f17f27 -F src/test_devsym.c 265e0c6a196e4b9cf880946b031483446a8033c3 +F src/test_devsym.c 709712f5157667410cd0dad1b7b1b54319c122c5 F src/test_func.c 13b582345fb1185a93e46c53310fae8547dcce20 F src/test_hexio.c 1237f000ec7a491009b1233f5c626ea71bce1ea2 F src/test_init.c 5d624ffd0409d424cf9adbfe1f056b200270077c @@ -201,13 +204,13 @@ F src/test_loadext.c df586c27176e3c2cb2e099c78da67bf14379a56e F src/test_malloc.c 2842c922b8e8d992aba722214952204ca025b411 F src/test_mutex.c ce06b59aca168cd8c520b77159a24352a7469bd3 F src/test_onefile.c 4ce8c753c0240f010f0f2af89604875967d20945 -F src/test_osinst.c 18b342b0979a78788af91e6b48b0fdcf0c4b340c +F src/test_osinst.c 280876b040c19d0a8fcd1852cb94e2f630c59bac F src/test_pcache.c 7bf828972ac0d2403f5cfa4cd14da41f8ebe73d8 F src/test_schema.c 8c06ef9ddb240c7a0fcd31bc221a6a2aade58bf0 F src/test_server.c bbba05c144b5fc4b52ff650a4328027b3fa5fcc6 F src/test_tclvar.c f4dc67d5f780707210d6bb0eb6016a431c04c7fa F src/test_thread.c aa9919c885a1fe53eafc73492f0898ee6c0a0726 -F src/test_vfs.c a2d320ea9afc0d520b68eb4998f789b4f8007bfc +F src/test_vfs.c 4f4f121f7d508101a2b33d166567f4ccd226b5ad F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9 F src/tokenize.c 25ceb0f0a746ea1d0f9553787f3f0a56853cfaeb F src/trigger.c 8927588cb9e6d47f933b53bfe74200fbb504100d @@ -215,7 +218,7 @@ F src/update.c 9859f2056c7739a1db0d9774ccb6c2f0cee6d1de F src/utf.c 1baeeac91707a4df97ccc6141ec0f808278af685 F src/util.c 32aebf04c10e51ad3977a928b7416bed671b620b F src/vacuum.c b17355fc10cef0875626932ec2f1fa1deb0daa48 -F src/vdbe.c 950cd4e9dfeec3066251897c1ff89331df38b625 +F src/vdbe.c 965247d966bb5bc9db819e27c076c8acd43ea4fd F src/vdbe.h 471f6a3dcec4817ca33596fe7f6654d56c0e75f3 F src/vdbeInt.h 19ebc8c2a2e938340051ee65af3f377fb99102d1 F src/vdbeapi.c dc3138f10afbc95ed3c21dd25abb154504b1db9d @@ -224,8 +227,8 @@ F src/vdbeblob.c 5327132a42a91e8b7acfb60b9d2c3b1c5c863e0e F src/vdbemem.c 2a82f455f6ca6f78b59fb312f96054c04ae0ead1 F src/vdbetrace.c 864cef96919323482ebd9986f2132435115e9cc2 F src/vtab.c a0f8a40274e4261696ef57aa806de2776ab72cda -F src/wal.c ce631adb92c0c53d87bb86913dc6714cca825e3a -F src/wal.h 111c6f3efd83fe2fc707b29e26431e8eff4c6f28 +F src/wal.c 131a5eaa59935cb3792ceed95a2b161a862c63f6 +F src/wal.h 1c1c9feb629b7f4afcbe0b47f80f47c5551d3a02 F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f F src/where.c 75fee9e255b62f773fcadd1d1f25b6f63ac7a356 F test/aggerror.test a867e273ef9e3d7919f03ef4f0e8c0d2767944f2 @@ -347,7 +350,7 @@ F test/exclusive.test 4d8a112d6c5bf52014e9383c25ff193cc4f67185 F test/exclusive2.test 6bdf254770a843c2933b54bee9ed239934f0a183 F test/exec.test e949714dc127eaa5ecc7d723efec1ec27118fdd7 F test/expr.test 9f521ae22f00e074959f72ce2e55d46b9ed23f68 -F test/filectrl.test 8923a6dc7630f31c8a9dd3d3d740aa0922df7bf8 +F test/filectrl.test 97003734290887566e01dded09dc9e99cb937e9e F test/filefmt.test 84e3d0fe9f12d0d2ac852465c6f8450aea0d6f43 F test/fkey1.test 01c7de578e11747e720c2d9aeef27f239853c4da F test/fkey2.test 098c06c139a79f690301a43511cd1f6420ae5433 @@ -482,7 +485,7 @@ F test/lock4.test f4f36271aa5ae1da449646bf43c7341f6b2b4c4e F test/lock5.test 6b1f78f09ad1522843dad571b76b321e6f439bf7 F test/lock6.test 862aa71e97b288d6b3f92ba3313f51bd0b003776 F test/lock7.test 64006c84c1c616657e237c7ad6532b765611cf64 -F test/lock_common.tcl bbc4e15ee5334cc4d01fcac08d7c9de7d8906e55 +F test/lock_common.tcl 69d6b67f2ba63968ec2173bcd7310c5c7eca00a2 F test/lookaside.test 1dd350dc6dff015c47c07fcc5a727a72fc5bae02 F test/main.test 2be2352ac77ac5b238c6337a5469aeeef57677e6 F test/make-where7.tcl 05c16b5d4f5d6512881dfec560cb793915932ef9 @@ -761,8 +764,8 @@ F test/vtabE.test 7c4693638d7797ce2eda17af74292b97e705cc61 F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5 F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8 F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d -F test/wal.test be8ef043253ca735ffcabb92a7dac2d79ebfe8c1 -F test/wal2.test c58bb5b2fac48b8393909038ced730df5ad973b0 +F test/wal.test a54d9be9e82eede1653f7998723ead8ce8a1a580 +F test/wal2.test a7c0265ecea25e95262d6448dc4fbe3fbe94cf0d F test/walbak.test e7650a26eb4b8abeca9b145b1af1e63026dde432 F test/walcksum.test 4efa8fb88c32bed8288ea4385a9cc113a5c8f0bf F test/walcrash.test f6d5fb2bb108876f04848720a488065d9deef69f @@ -771,7 +774,7 @@ F test/walfault.test 690350d02057409b695a3694f048780f2c5e21f4 F test/walhook.test 67e675127f4acb72f061a12667ce6e5460b06b78 F test/walmode.test 6ca9d710cc9f6545b913abcded6d6b0b15641048 F test/walslow.test d21625e2e99e11c032ce949e8a94661576548933 -F test/walthread.test 91ccfe5e04192d2c3fc252d82b28e110a81d0d2e +F test/walthread.test a25a393c068a2b42b44333fa3fdaae9072f1617c F test/where.test de337a3fe0a459ec7c93db16a519657a90552330 F test/where2.test 43d4becaf5a5df854e6c21d624a1cb84c6904554 F test/where3.test aa44a9b29e8c9f3d7bb94a3bb3a95b31627d520d @@ -815,7 +818,14 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f -P e9e5b1001986348ef0f88c19de87b94559a5451e -R f3f15c95e56f90189266a174fb626d53 -U dan -Z 4fade6b535eae7f88d92509f7fe17c62 +P 15abbc34168f7a5bd418254c2b16aac97029e6ea f4b9003a2d3db88eaabb4b291e6cea8e8ea6ff51 +R 858d8d9dc0b3efa473b8ec6d14fcb8f5 +U drh +Z a6bb738fb568ab52924e606c6a5ae6d0 +-----BEGIN PGP SIGNATURE----- +Version: GnuPG v1.4.6 (GNU/Linux) + +iD8DBQFMBSYyoxKgR168RlERAlrVAJ9V4XzYgJAVXjVRCX0qxlsuSjehHgCdFXEg +GoCrBmurJgvyGZW2vBLkX4A= +=Giju +-----END PGP SIGNATURE----- diff --git a/manifest.uuid b/manifest.uuid index e3f39cee60..593dda5046 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -15abbc34168f7a5bd418254c2b16aac97029e6ea \ No newline at end of file +1d3e569e59ba89cc167f0a48951ecd82f10322ba \ No newline at end of file diff --git a/src/btree.c b/src/btree.c index eace01fff1..1e1b6f19ea 100644 --- a/src/btree.c +++ b/src/btree.c @@ -2546,7 +2546,7 @@ int sqlite3BtreeBeginTrans(Btree *p, int wrflag){ if( rc!=SQLITE_OK ){ unlockBtreeIfUnused(pBt); } - }while( rc==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE && + }while( (rc&0xFF)==SQLITE_BUSY && pBt->inTransaction==TRANS_NONE && btreeInvokeBusyHandler(pBt) ); if( rc==SQLITE_OK ){ diff --git a/src/os.c b/src/os.c index 0593654007..0b17a6b633 100644 --- a/src/os.c +++ b/src/os.c @@ -110,8 +110,8 @@ int sqlite3OsShmGet(sqlite3_file *id,int reqSize,int *pSize,void volatile **pp){ int sqlite3OsShmRelease(sqlite3_file *id){ return id->pMethods->xShmRelease(id); } -int sqlite3OsShmLock(sqlite3_file *id, int desiredLock, int *pGotLock){ - return id->pMethods->xShmLock(id, desiredLock, pGotLock); +int sqlite3OsShmLock(sqlite3_file *id, int offset, int n, int flags){ + return id->pMethods->xShmLock(id, offset, n, flags); } void sqlite3OsShmBarrier(sqlite3_file *id){ id->pMethods->xShmBarrier(id); diff --git a/src/os.h b/src/os.h index ba00c44ed2..670ee43d3b 100644 --- a/src/os.h +++ b/src/os.h @@ -247,7 +247,7 @@ int sqlite3OsShmOpen(sqlite3_file *id); int sqlite3OsShmSize(sqlite3_file *id, int, int*); int sqlite3OsShmGet(sqlite3_file *id, int, int*, void volatile**); int sqlite3OsShmRelease(sqlite3_file *id); -int sqlite3OsShmLock(sqlite3_file *id, int, int*); +int sqlite3OsShmLock(sqlite3_file *id, int, int, int); void sqlite3OsShmBarrier(sqlite3_file *id); int sqlite3OsShmClose(sqlite3_file *id, int); diff --git a/src/os_unix.c b/src/os_unix.c index 227a3c5181..868071214f 100644 --- a/src/os_unix.c +++ b/src/os_unix.c @@ -2066,7 +2066,7 @@ static int semClose(sqlite3_file *id) { semUnlock(id, NO_LOCK); assert( pFile ); unixEnterMutex(); - releaseLockInfo(pFile->pInode); + releaseInodeInfo(pFile->pInode); unixLeaveMutex(); closeUnixFile(id); } @@ -2533,7 +2533,7 @@ static int afpClose(sqlite3_file *id) { */ setPendingFd(pFile); } - releaseLockInfo(pFile->pInode); + releaseInodeInfo(pFile->pInode); sqlite3_free(pFile->lockingContext); rc = closeUnixFile(id); unixLeaveMutex(); @@ -3168,67 +3168,23 @@ struct unixShmNode { struct unixShm { unixShmNode *pShmNode; /* The underlying unixShmNode object */ unixShm *pNext; /* Next unixShm with the same unixShmNode */ - u8 lockState; /* Current lock state */ u8 hasMutex; /* True if holding the unixShmNode mutex */ u8 hasMutexBuf; /* True if holding pFile->mutexBuf */ - u8 sharedMask; /* Mask of shared locks held */ - u8 exclMask; /* Mask of exclusive locks held */ + u16 sharedMask; /* Mask of shared locks held */ + u16 exclMask; /* Mask of exclusive locks held */ #ifdef SQLITE_DEBUG u8 id; /* Id of this connection within its unixShmNode */ #endif }; -/* -** Size increment by which shared memory grows -*/ -#define SQLITE_UNIX_SHM_INCR 4096 - /* ** Constants used for locking */ -#define UNIX_SHM_BASE 80 /* Byte offset of the first lock byte */ -#define UNIX_SHM_DMS 0x01 /* Mask for Dead-Man-Switch lock */ -#define UNIX_SHM_A 0x10 /* Mask for region locks... */ -#define UNIX_SHM_B 0x20 -#define UNIX_SHM_C 0x40 -#define UNIX_SHM_D 0x80 - -#ifdef SQLITE_DEBUG -/* -** Return a pointer to a nul-terminated string in static memory that -** describes a locking mask. The string is of the form "MSABCD" with -** each character representing a lock. "M" for MUTEX, "S" for DMS, -** and "A" through "D" for the region locks. If a lock is held, the -** letter is shown. If the lock is not held, the letter is converted -** to ".". -** -** This routine is for debugging purposes only and does not appear -** in a production build. -*/ -static const char *unixShmLockString(u8 mask){ - static char zBuf[48]; - static int iBuf = 0; - char *z; - - z = &zBuf[iBuf]; - iBuf += 8; - if( iBuf>=sizeof(zBuf) ) iBuf = 0; - - z[0] = (mask & UNIX_SHM_DMS) ? 'S' : '.'; - z[1] = (mask & UNIX_SHM_A) ? 'A' : '.'; - z[2] = (mask & UNIX_SHM_B) ? 'B' : '.'; - z[3] = (mask & UNIX_SHM_C) ? 'C' : '.'; - z[4] = (mask & UNIX_SHM_D) ? 'D' : '.'; - z[5] = 0; - return z; -} -#endif /* SQLITE_DEBUG */ +#define UNIX_SHM_BASE ((18+SQLITE_SHM_NLOCK)*4) /* first lock byte */ +#define UNIX_SHM_DMS (UNIX_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ /* -** Apply posix advisory locks for all bytes identified in lockMask. -** -** lockMask might contain multiple bits but all bits are guaranteed -** to be contiguous. +** Apply posix advisory locks for all bytes from ofst through ofst+n-1. ** ** Locks block if the mask is exactly UNIX_SHM_C and are non-blocking ** otherwise. @@ -3236,198 +3192,69 @@ static const char *unixShmLockString(u8 mask){ static int unixShmSystemLock( unixShmNode *pShmNode, /* Apply locks to this open shared-memory segment */ int lockType, /* F_UNLCK, F_RDLCK, or F_WRLCK */ - u8 lockMask /* Which bytes to lock or unlock */ + int ofst, /* First byte of the locking range */ + int n /* Number of bytes to lock */ ){ struct flock f; /* The posix advisory locking structure */ - int lockOp; /* The opcode for fcntl() */ - int i; /* Offset into the locking byte range */ - int rc; /* Result code form fcntl() */ - u8 mask; /* Mask of bits in lockMask */ + int rc = SQLITE_OK; /* Result code form fcntl() */ /* Access to the unixShmNode object is serialized by the caller */ assert( sqlite3_mutex_held(pShmNode->mutex) || pShmNode->nRef==0 ); + /* Shared locks never span more than one byte */ + assert( n==1 || lockType!=F_RDLCK ); + + /* Locks are within range */ + assert( n>=1 && nh, lockOp, &f); + rc = fcntl(pShmNode->h, F_SETLK, &f); rc = (rc!=(-1)) ? SQLITE_OK : SQLITE_BUSY; /* Update the global lock state and do debug tracing */ #ifdef SQLITE_DEBUG + { u16 mask; OSTRACE(("SHM-LOCK ")); + mask = (1<<(ofst+n)) - (1<exclMask &= ~lockMask; - pShmNode->sharedMask &= ~lockMask; + OSTRACE(("unlock %d ok", ofst)); + pShmNode->exclMask &= ~mask; + pShmNode->sharedMask &= ~mask; }else if( lockType==F_RDLCK ){ - OSTRACE(("read-lock ok")); - pShmNode->exclMask &= ~lockMask; - pShmNode->sharedMask |= lockMask; + OSTRACE(("read-lock %d ok", ofst)); + pShmNode->exclMask &= ~mask; + pShmNode->sharedMask |= mask; }else{ assert( lockType==F_WRLCK ); - OSTRACE(("write-lock ok")); - pShmNode->exclMask |= lockMask; - pShmNode->sharedMask &= ~lockMask; + OSTRACE(("write-lock %d ok", ofst)); + pShmNode->exclMask |= mask; + pShmNode->sharedMask &= ~mask; } }else{ if( lockType==F_UNLCK ){ - OSTRACE(("unlock failed")); + OSTRACE(("unlock %d failed", ofst)); }else if( lockType==F_RDLCK ){ OSTRACE(("read-lock failed")); }else{ assert( lockType==F_WRLCK ); - OSTRACE(("write-lock failed")); + OSTRACE(("write-lock %d failed", ofst)); } } - OSTRACE((" - change requested %s - afterwards %s:%s\n", - unixShmLockString(lockMask), - unixShmLockString(pShmNode->sharedMask), - unixShmLockString(pShmNode->exclMask))); + OSTRACE((" - afterwards %03x,%03x\n", + pShmNode->sharedMask, pShmNode->exclMask)); + } #endif return rc; } -/* -** For connection p, unlock all of the locks identified by the unlockMask -** parameter. -*/ -static int unixShmUnlock( - unixShmNode *pShmNode, /* The underlying shared-memory file */ - unixShm *p, /* The connection to be unlocked */ - u8 unlockMask /* Mask of locks to be unlocked */ -){ - int rc; /* Result code */ - unixShm *pX; /* For looping over all sibling connections */ - u8 allMask; /* Union of locks held by connections other than "p" */ - - /* Access to the unixShmNode object is serialized by the caller */ - assert( sqlite3_mutex_held(pShmNode->mutex) ); - - /* Compute locks held by sibling connections */ - allMask = 0; - for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ - if( pX==p ) continue; - assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); - allMask |= pX->sharedMask; - } - - /* Unlock the system-level locks */ - if( (unlockMask & allMask)!=unlockMask ){ - rc = unixShmSystemLock(pShmNode, F_UNLCK, unlockMask & ~allMask); - }else{ - rc = SQLITE_OK; - } - - /* Undo the local locks */ - if( rc==SQLITE_OK ){ - p->exclMask &= ~unlockMask; - p->sharedMask &= ~unlockMask; - } - return rc; -} - -/* -** Get reader locks for connection p on all locks in the readMask parameter. -*/ -static int unixShmSharedLock( - unixShmNode *pShmNode, /* The underlying shared-memory file */ - unixShm *p, /* The connection to get the shared locks */ - u8 readMask /* Mask of shared locks to be acquired */ -){ - int rc; /* Result code */ - unixShm *pX; /* For looping over all sibling connections */ - u8 allShared; /* Union of locks held by connections other than "p" */ - - /* Access to the unixShmNode object is serialized by the caller */ - assert( sqlite3_mutex_held(pShmNode->mutex) ); - - /* Find out which shared locks are already held by sibling connections. - ** If any sibling already holds an exclusive lock, go ahead and return - ** SQLITE_BUSY. - */ - allShared = 0; - for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ - if( pX==p ) continue; - if( (pX->exclMask & readMask)!=0 ) return SQLITE_BUSY; - allShared |= pX->sharedMask; - } - - /* Get shared locks at the system level, if necessary */ - if( (~allShared) & readMask ){ - rc = unixShmSystemLock(pShmNode, F_RDLCK, readMask); - }else{ - rc = SQLITE_OK; - } - - /* Get the local shared locks */ - if( rc==SQLITE_OK ){ - p->sharedMask |= readMask; - } - return rc; -} - -/* -** For connection p, get an exclusive lock on all locks identified in -** the writeMask parameter. -*/ -static int unixShmExclusiveLock( - unixShmNode *pShmNode, /* The underlying shared-memory file */ - unixShm *p, /* The connection to get the exclusive locks */ - u8 writeMask /* Mask of exclusive locks to be acquired */ -){ - int rc; /* Result code */ - unixShm *pX; /* For looping over all sibling connections */ - - /* Access to the unixShmNode object is serialized by the caller */ - assert( sqlite3_mutex_held(pShmNode->mutex) ); - - /* Make sure no sibling connections hold locks that will block this - ** lock. If any do, return SQLITE_BUSY right away. - */ - for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ - if( pX==p ) continue; - if( (pX->exclMask & writeMask)!=0 ) return SQLITE_BUSY; - if( (pX->sharedMask & writeMask)!=0 ) return SQLITE_BUSY; - } - - /* Get the exclusive locks at the system level. Then if successful - ** also mark the local connection as being locked. - */ - rc = unixShmSystemLock(pShmNode, F_WRLCK, writeMask); - if( rc==SQLITE_OK ){ - p->sharedMask &= ~writeMask; - p->exclMask |= writeMask; - } - return rc; -} /* ** Purge the unixShmNodeList list of all entries with unixShmNode.nRef==0. @@ -3520,13 +3347,13 @@ static int unixShmOpen( ** If not, truncate the file to zero length. */ rc = SQLITE_OK; - if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS)==SQLITE_OK ){ + if( unixShmSystemLock(pShmNode, F_WRLCK, UNIX_SHM_DMS, 1)==SQLITE_OK ){ if( ftruncate(pShmNode->h, 0) ){ rc = SQLITE_IOERR; } } if( rc==SQLITE_OK ){ - rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS); + rc = unixShmSystemLock(pShmNode, F_RDLCK, UNIX_SHM_DMS, 1); } if( rc ) goto shm_open_err; } @@ -3572,10 +3399,6 @@ static int unixShmClose( assert( pShmNode==pDbFd->pInode->pShmNode ); assert( pShmNode->pInode==pDbFd->pInode ); - /* Verify that the connection being closed holds no locks */ - assert( p->exclMask==0 ); - assert( p->sharedMask==0 ); - /* Remove connection p from the set of connections associated ** with pShmNode */ sqlite3_mutex_enter(pShmNode->mutex); @@ -3583,6 +3406,7 @@ static int unixShmClose( *pp = p->pNext; /* Free the connection p */ + assert( p->hasMutexBuf==0 ); sqlite3_free(p); pDbFd->pShm = 0; sqlite3_mutex_leave(pShmNode->mutex); @@ -3641,6 +3465,27 @@ static int unixShmSize( return rc; } +/* +** Release the lock held on the shared memory segment to that other +** threads are free to resize it if necessary. +** +** If the lock is not currently held, this routine is a harmless no-op. +** +** If the shared-memory object is in lock state RECOVER, then we do not +** really want to release the lock, so in that case too, this routine +** is a no-op. +*/ +static int unixShmRelease(sqlite3_file *fd){ + unixFile *pDbFd = (unixFile*)fd; + unixShm *p = pDbFd->pShm; + + if( p->hasMutexBuf ){ + assert( sqlite3_mutex_notheld(p->pShmNode->mutex) ); + sqlite3_mutex_leave(p->pShmNode->mutexBuf); + p->hasMutexBuf = 0; + } + return SQLITE_OK; +} /* ** Map the shared storage into memory. @@ -3687,7 +3532,7 @@ static int unixShmGet( assert( pShmNode==pDbFd->pInode->pShmNode ); assert( pShmNode->pInode==pDbFd->pInode ); - if( p->lockState!=SQLITE_SHM_CHECKPOINT && p->hasMutexBuf==0 ){ + if( p->hasMutexBuf==0 ){ assert( sqlite3_mutex_notheld(pShmNode->mutex) ); sqlite3_mutex_enter(pShmNode->mutexBuf); p->hasMutexBuf = 1; @@ -3714,172 +3559,123 @@ static int unixShmGet( *pNewMapSize = pShmNode->szMap; *ppBuf = pShmNode->pMMapBuf; sqlite3_mutex_leave(pShmNode->mutex); + if( *ppBuf==0 ){ + /* Do not hold the mutex if a NULL pointer is being returned. */ + unixShmRelease(fd); + } return rc; } -/* -** Release the lock held on the shared memory segment to that other -** threads are free to resize it if necessary. -** -** If the lock is not currently held, this routine is a harmless no-op. -** -** If the shared-memory object is in lock state RECOVER, then we do not -** really want to release the lock, so in that case too, this routine -** is a no-op. -*/ -static int unixShmRelease(sqlite3_file *fd){ - unixFile *pDbFd = (unixFile*)fd; - unixShm *p = pDbFd->pShm; - - if( p->hasMutexBuf && p->lockState!=SQLITE_SHM_RECOVER ){ - assert( sqlite3_mutex_notheld(p->pShmNode->mutex) ); - sqlite3_mutex_leave(p->pShmNode->mutexBuf); - p->hasMutexBuf = 0; - } - return SQLITE_OK; -} - -/* -** Symbolic names for LOCK states used for debugging. -*/ -#ifdef SQLITE_DEBUG -static const char *azLkName[] = { - "UNLOCK", - "READ", - "READ_FULL", - "WRITE", - "PENDING", - "CHECKPOINT", - "RECOVER" -}; -#endif - /* ** Change the lock state for a shared-memory segment. +** +** Note that the relationship between SHAREd and EXCLUSIVE locks is a little +** different here than in posix. In xShmLock(), one can go from unlocked +** to shared and back or from unlocked to exclusive and back. But one may +** not go from shared to exclusive or from exclusive to shared. */ static int unixShmLock( sqlite3_file *fd, /* Database file holding the shared memory */ - int desiredLock, /* One of SQLITE_SHM_xxxxx locking states */ - int *pGotLock /* The lock you actually got */ + int ofst, /* First lock to acquire or release */ + int n, /* Number of locks to acquire or release */ + int flags /* What to do with the lock */ ){ - unixFile *pDbFd = (unixFile*)fd; - unixShm *p = pDbFd->pShm; - unixShmNode *pShmNode = p->pShmNode; - int rc = SQLITE_PROTOCOL; + unixFile *pDbFd = (unixFile*)fd; /* Connection holding shared memory */ + unixShm *p = pDbFd->pShm; /* The shared memory being locked */ + unixShm *pX; /* For looping over all siblings */ + unixShmNode *pShmNode = p->pShmNode; /* The underlying file iNode */ + int rc = SQLITE_OK; /* Result code */ + u16 mask; /* Mask of locks to take or release */ assert( pShmNode==pDbFd->pInode->pShmNode ); assert( pShmNode->pInode==pDbFd->pInode ); + assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); + assert( n>=1 ); + assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) + || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) + || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) + || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); + assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); - /* Note that SQLITE_SHM_READ_FULL and SQLITE_SHM_PENDING are never - ** directly requested; they are side effects from requesting - ** SQLITE_SHM_READ and SQLITE_SHM_CHECKPOINT, respectively. - */ - assert( desiredLock==SQLITE_SHM_UNLOCK - || desiredLock==SQLITE_SHM_READ - || desiredLock==SQLITE_SHM_WRITE - || desiredLock==SQLITE_SHM_CHECKPOINT - || desiredLock==SQLITE_SHM_RECOVER ); - - /* Return directly if this is just a lock state query, or if - ** the connection is already in the desired locking state. - */ - if( desiredLock==p->lockState - || (desiredLock==SQLITE_SHM_READ && p->lockState==SQLITE_SHM_READ_FULL) - ){ - OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s and got %s\n", - p->id, getpid(), azLkName[desiredLock], azLkName[p->lockState])); - if( pGotLock ) *pGotLock = p->lockState; - return SQLITE_OK; - } - - OSTRACE(("SHM-LOCK shmid-%d, pid-%d request %s->%s\n", - p->id, getpid(), azLkName[p->lockState], azLkName[desiredLock])); - - if( desiredLock==SQLITE_SHM_RECOVER && !p->hasMutexBuf ){ - assert( sqlite3_mutex_notheld(pShmNode->mutex) ); - sqlite3_mutex_enter(pShmNode->mutexBuf); - p->hasMutexBuf = 1; - } + mask = (1<<(ofst+n)) - (1<1 || mask==(1<mutex); - switch( desiredLock ){ - case SQLITE_SHM_UNLOCK: { - assert( p->lockState!=SQLITE_SHM_RECOVER ); - unixShmUnlock(pShmNode, p, UNIX_SHM_A|UNIX_SHM_B|UNIX_SHM_C|UNIX_SHM_D); + if( flags & SQLITE_SHM_UNLOCK ){ + u16 allMask = 0; /* Mask of locks held by siblings */ + + /* See if any siblings hold this same lock */ + for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ + if( pX==p ) continue; + assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); + allMask |= pX->sharedMask; + } + + /* Unlock the system-level locks */ + if( (mask & allMask)==0 ){ + rc = unixShmSystemLock(pShmNode, F_UNLCK, ofst+UNIX_SHM_BASE, n); + }else{ rc = SQLITE_OK; - p->lockState = SQLITE_SHM_UNLOCK; - break; } - case SQLITE_SHM_READ: { - if( p->lockState==SQLITE_SHM_UNLOCK ){ - int nAttempt; + + /* Undo the local locks */ + if( rc==SQLITE_OK ){ + p->exclMask &= ~mask; + p->sharedMask &= ~mask; + } + }else if( flags & SQLITE_SHM_SHARED ){ + u16 allShared = 0; /* Union of locks held by connections other than "p" */ + + /* Find out which shared locks are already held by sibling connections. + ** If any sibling already holds an exclusive lock, go ahead and return + ** SQLITE_BUSY. + */ + for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ + if( (pX->exclMask & mask)!=0 ){ rc = SQLITE_BUSY; - assert( p->lockState==SQLITE_SHM_UNLOCK ); - for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){ - rc = unixShmSharedLock(pShmNode, p, UNIX_SHM_A|UNIX_SHM_B); - if( rc==SQLITE_BUSY ){ - rc = unixShmSharedLock(pShmNode, p, UNIX_SHM_D); - if( rc==SQLITE_OK ){ - p->lockState = SQLITE_SHM_READ_FULL; - } - }else{ - unixShmUnlock(pShmNode, p, UNIX_SHM_B); - p->lockState = SQLITE_SHM_READ; - } - } + break; + } + allShared |= pX->sharedMask; + } + + /* Get shared locks at the system level, if necessary */ + if( rc==SQLITE_OK ){ + if( (allShared & mask)==0 ){ + rc = unixShmSystemLock(pShmNode, F_RDLCK, ofst+UNIX_SHM_BASE, n); }else{ - assert( p->lockState==SQLITE_SHM_WRITE - || p->lockState==SQLITE_SHM_RECOVER ); - rc = unixShmSharedLock(pShmNode, p, UNIX_SHM_A); - unixShmUnlock(pShmNode, p, UNIX_SHM_C|UNIX_SHM_D); - p->lockState = SQLITE_SHM_READ; + rc = SQLITE_OK; } - break; } - case SQLITE_SHM_WRITE: { - assert( p->lockState==SQLITE_SHM_READ - || p->lockState==SQLITE_SHM_READ_FULL ); - rc = unixShmExclusiveLock(pShmNode, p, UNIX_SHM_C|UNIX_SHM_D); + + /* Get the local shared locks */ + if( rc==SQLITE_OK ){ + p->sharedMask |= mask; + } + }else{ + /* Make sure no sibling connections hold locks that will block this + ** lock. If any do, return SQLITE_BUSY right away. + */ + for(pX=pShmNode->pFirst; pX; pX=pX->pNext){ + if( (pX->exclMask & mask)!=0 || (pX->sharedMask & mask)!=0 ){ + rc = SQLITE_BUSY; + break; + } + } + + /* Get the exclusive locks at the system level. Then if successful + ** also mark the local connection as being locked. + */ + if( rc==SQLITE_OK ){ + rc = unixShmSystemLock(pShmNode, F_WRLCK, ofst+UNIX_SHM_BASE, n); if( rc==SQLITE_OK ){ - p->lockState = SQLITE_SHM_WRITE; + assert( (p->sharedMask & mask)==0 ); + p->exclMask |= mask; } - break; - } - case SQLITE_SHM_CHECKPOINT: { - assert( p->lockState==SQLITE_SHM_UNLOCK - || p->lockState==SQLITE_SHM_PENDING - ); - if( p->lockState==SQLITE_SHM_UNLOCK ){ - rc = unixShmExclusiveLock(pShmNode, p, UNIX_SHM_B|UNIX_SHM_C); - if( rc==SQLITE_OK ){ - p->lockState = SQLITE_SHM_PENDING; - } - } - if( p->lockState==SQLITE_SHM_PENDING ){ - rc = unixShmExclusiveLock(pShmNode, p, UNIX_SHM_A); - if( rc==SQLITE_OK ){ - p->lockState = SQLITE_SHM_CHECKPOINT; - } - } - break; - } - default: { - assert( desiredLock==SQLITE_SHM_RECOVER ); - assert( p->lockState==SQLITE_SHM_READ - || p->lockState==SQLITE_SHM_READ_FULL - ); - assert( sqlite3_mutex_held(pShmNode->mutexBuf) ); - rc = unixShmExclusiveLock(pShmNode, p, UNIX_SHM_C); - if( rc==SQLITE_OK ){ - p->lockState = SQLITE_SHM_RECOVER; - } - break; } } sqlite3_mutex_leave(pShmNode->mutex); - OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %s\n", - p->id, getpid(), azLkName[p->lockState])); - if( pGotLock ) *pGotLock = p->lockState; + OSTRACE(("SHM-LOCK shmid-%d, pid-%d got %03x,%03x\n", + p->id, getpid(), p->sharedMask, p->exclMask)); return rc; } @@ -3892,12 +3688,8 @@ static int unixShmLock( static void unixShmBarrier( sqlite3_file *fd /* Database file holding the shared memory */ ){ -#ifdef __GNUC__ - __sync_synchronize(); -#else - unixMutexEnter(); - unixMutexLeave(); -#endif + unixEnterMutex(); + unixLeaveMutex(); } diff --git a/src/os_win.c b/src/os_win.c index fdcdda85f1..faff42033d 100644 --- a/src/os_win.c +++ b/src/os_win.c @@ -1227,8 +1227,6 @@ struct winShmNode { winShm *pFirst; /* All winShm objects pointing to this */ winShmNode *pNext; /* Next in list of all winShmNode objects */ #ifdef SQLITE_DEBUG - u8 exclMask; /* Mask of exclusive locks held */ - u8 sharedMask; /* Mask of shared locks held */ u8 nextShmId; /* Next available winShm.id value */ #endif }; @@ -1253,70 +1251,21 @@ static winShmNode *winShmNodeList = 0; struct winShm { winShmNode *pShmNode; /* The underlying winShmNode object */ winShm *pNext; /* Next winShm with the same winShmNode */ - u8 lockState; /* Current lock state */ u8 hasMutex; /* True if holding the winShmNode mutex */ u8 hasMutexBuf; /* True if holding pFile->mutexBuf */ - u8 sharedMask; /* Mask of shared locks held */ - u8 exclMask; /* Mask of exclusive locks held */ #ifdef SQLITE_DEBUG u8 id; /* Id of this connection with its winShmNode */ #endif }; -/* -** Size increment by which shared memory grows -*/ -#define SQLITE_WIN_SHM_INCR 4096 - /* ** Constants used for locking */ -#define WIN_SHM_BASE 80 /* Byte offset of the first lock byte */ -#define WIN_SHM_DMS 0x01 /* Mask for Dead-Man-Switch lock */ -#define WIN_SHM_A 0x10 /* Mask for region locks... */ -#define WIN_SHM_B 0x20 -#define WIN_SHM_C 0x40 -#define WIN_SHM_D 0x80 - -#ifdef SQLITE_DEBUG -/* -** Return a pointer to a nul-terminated string in static memory that -** describes a locking mask. The string is of the form "MSABCD" with -** each character representing a lock. "M" for MUTEX, "S" for DMS, -** and "A" through "D" for the region locks. If a lock is held, the -** letter is shown. If the lock is not held, the letter is converted -** to ".". -** -** This routine is for debugging purposes only and does not appear -** in a production build. -*/ -static const char *winShmLockString(u8 mask){ - static char zBuf[48]; - static int iBuf = 0; - char *z; - - z = &zBuf[iBuf]; - iBuf += 8; - if( iBuf>=sizeof(zBuf) ) iBuf = 0; - - z[0] = (mask & WIN_SHM_DMS) ? 'S' : '.'; - z[1] = (mask & WIN_SHM_A) ? 'A' : '.'; - z[2] = (mask & WIN_SHM_B) ? 'B' : '.'; - z[3] = (mask & WIN_SHM_C) ? 'C' : '.'; - z[4] = (mask & WIN_SHM_D) ? 'D' : '.'; - z[5] = 0; - return z; -} -#endif /* SQLITE_DEBUG */ +#define WIN_SHM_BASE ((18+SQLITE_SHM_NLOCK)*4) /* first lock byte */ +#define WIN_SHM_DMS (WIN_SHM_BASE+SQLITE_SHM_NLOCK) /* deadman switch */ /* -** Apply posix advisory locks for all bytes identified in lockMask. -** -** lockMask might contain multiple bits but all bits are guaranteed -** to be contiguous. -** -** Locks block if the mask is exactly WIN_SHM_C and are non-blocking -** otherwise. +** Apply advisory locks for all n bytes beginning at ofst. */ #define _SHM_UNLCK 1 #define _SHM_RDLCK 2 @@ -1324,235 +1273,38 @@ static const char *winShmLockString(u8 mask){ static int winShmSystemLock( winShmNode *pFile, /* Apply locks to this open shared-memory segment */ int lockType, /* _SHM_UNLCK, _SHM_RDLCK, or _SHM_WRLCK */ - u8 lockMask /* Which bytes to lock or unlock */ + int ofst, /* Offset to first byte to be locked/unlocked */ + int nByte /* Number of bytes to lock or unlock */ ){ OVERLAPPED ovlp; DWORD dwFlags; - int nBytes; /* Number of bytes to lock */ - int i; /* Offset into the locking byte range */ int rc = 0; /* Result code form Lock/UnlockFileEx() */ - u8 mask; /* Mask of bits in lockMask */ /* Access to the winShmNode object is serialized by the caller */ assert( sqlite3_mutex_held(pFile->mutex) || pFile->nRef==0 ); /* Initialize the locking parameters */ - if( lockMask==WIN_SHM_C && lockType!=_SHM_UNLCK ){ - dwFlags = 0; - OSTRACE(("SHM-LOCK %d requesting blocking lock %s\n", - pFile->hFile.h, - winShmLockString(lockMask))); - }else{ - dwFlags = LOCKFILE_FAIL_IMMEDIATELY; - OSTRACE(("SHM-LOCK %d requesting %s %s\n", - pFile->hFile.h, - lockType!=_SHM_UNLCK ? "lock" : "unlock", - winShmLockString(lockMask))); - } + dwFlags = LOCKFILE_FAIL_IMMEDIATELY; if( lockType == _SHM_WRLCK ) dwFlags |= LOCKFILE_EXCLUSIVE_LOCK; /* Find the first bit in lockMask that is set */ - for(i=0, mask=0x01; mask!=0 && (lockMask&mask)==0; mask <<= 1, i++){} - assert( mask!=0 ); memset(&ovlp, 0, sizeof(OVERLAPPED)); - ovlp.Offset = i+WIN_SHM_BASE; - nBytes = 1; - - /* Extend the locking range for each additional bit that is set */ - mask <<= 1; - while( mask!=0 && (lockMask & mask)!=0 ){ - nBytes++; - mask <<= 1; - } - - /* Verify that all bits set in lockMask are contiguous */ - assert( mask==0 || (lockMask & ~(mask | (mask-1)))==0 ); + ovlp.Offset = ofst; /* Release/Acquire the system-level lock */ if( lockType==_SHM_UNLCK ){ - for(i=0; ihFile.h, 0, 1, 0, &ovlp); - if( !rc ) break; - } + rc = UnlockFileEx(pFile->hFile.h, 0, nByte, 0, &ovlp); }else{ - /* release old individual byte locks (if any) - ** and set new individual byte locks */ - for(i=0; ihFile.h, 0, 1, 0, &ovlp); - rc = LockFileEx(pFile->hFile.h, dwFlags, 0, 1, 0, &ovlp); - if( !rc ) break; - } + rc = LockFileEx(pFile->hFile.h, dwFlags, 0, nByte, 0, &ovlp); } if( !rc ){ OSTRACE(("SHM-LOCK %d %s ERROR 0x%08lx\n", pFile->hFile.h, lockType==_SHM_UNLCK ? "UnlockFileEx" : "LockFileEx", GetLastError())); - /* release individual byte locks (if any) */ - ovlp.Offset-=i; - for(i=0; ihFile.h, 0, 1, 0, &ovlp); - } } rc = (rc!=0) ? SQLITE_OK : SQLITE_BUSY; - /* Update the global lock state and do debug tracing */ -#ifdef SQLITE_DEBUG - OSTRACE(("SHM-LOCK %d ", pFile->hFile.h)); - if( rc==SQLITE_OK ){ - if( lockType==_SHM_UNLCK ){ - OSTRACE(("unlock ok")); - pFile->exclMask &= ~lockMask; - pFile->sharedMask &= ~lockMask; - }else if( lockType==_SHM_RDLCK ){ - OSTRACE(("read-lock ok")); - pFile->exclMask &= ~lockMask; - pFile->sharedMask |= lockMask; - }else{ - assert( lockType==_SHM_WRLCK ); - OSTRACE(("write-lock ok")); - pFile->exclMask |= lockMask; - pFile->sharedMask &= ~lockMask; - } - }else{ - if( lockType==_SHM_UNLCK ){ - OSTRACE(("unlock failed")); - }else if( lockType==_SHM_RDLCK ){ - OSTRACE(("read-lock failed")); - }else{ - assert( lockType==_SHM_WRLCK ); - OSTRACE(("write-lock failed")); - } - } - OSTRACE((" - change requested %s - afterwards %s:%s\n", - winShmLockString(lockMask), - winShmLockString(pFile->sharedMask), - winShmLockString(pFile->exclMask))); -#endif - - return rc; -} - -/* -** For connection p, unlock all of the locks identified by the unlockMask -** parameter. -*/ -static int winShmUnlock( - winShmNode *pFile, /* The underlying shared-memory file */ - winShm *p, /* The connection to be unlocked */ - u8 unlockMask /* Mask of locks to be unlocked */ -){ - int rc; /* Result code */ - winShm *pX; /* For looping over all sibling connections */ - u8 allMask; /* Union of locks held by connections other than "p" */ - - /* Access to the winShmNode object is serialized by the caller */ - assert( sqlite3_mutex_held(pFile->mutex) ); - - /* don't attempt to unlock anything we don't have locks for */ - if( (unlockMask & (p->exclMask|p->sharedMask)) != unlockMask ){ - OSTRACE(("SHM-LOCK %d unlocking more than we have locked - requested %s - have %s\n", - pFile->hFile.h, - winShmLockString(unlockMask), - winShmLockString(p->exclMask|p->sharedMask))); - unlockMask &= (p->exclMask|p->sharedMask); - } - - /* Compute locks held by sibling connections */ - allMask = 0; - for(pX=pFile->pFirst; pX; pX=pX->pNext){ - if( pX==p ) continue; - assert( (pX->exclMask & (p->exclMask|p->sharedMask))==0 ); - allMask |= pX->sharedMask; - } - - /* Unlock the system-level locks */ - if( (unlockMask & allMask)!=unlockMask ){ - rc = winShmSystemLock(pFile, _SHM_UNLCK, unlockMask & ~allMask); - }else{ - rc = SQLITE_OK; - } - - /* Undo the local locks */ - if( rc==SQLITE_OK ){ - p->exclMask &= ~unlockMask; - p->sharedMask &= ~unlockMask; - } - return rc; -} - -/* -** Get reader locks for connection p on all locks in the readMask parameter. -*/ -static int winShmSharedLock( - winShmNode *pFile, /* The underlying shared-memory file */ - winShm *p, /* The connection to get the shared locks */ - u8 readMask /* Mask of shared locks to be acquired */ -){ - int rc; /* Result code */ - winShm *pX; /* For looping over all sibling connections */ - u8 allShared; /* Union of locks held by connections other than "p" */ - - /* Access to the winShmNode object is serialized by the caller */ - assert( sqlite3_mutex_held(pFile->mutex) ); - - /* Find out which shared locks are already held by sibling connections. - ** If any sibling already holds an exclusive lock, go ahead and return - ** SQLITE_BUSY. - */ - allShared = 0; - for(pX=pFile->pFirst; pX; pX=pX->pNext){ - if( pX==p ) continue; - if( (pX->exclMask & readMask)!=0 ) return SQLITE_BUSY; - allShared |= pX->sharedMask; - } - - /* Get shared locks at the system level, if necessary */ - if( (~allShared) & readMask ){ - rc = winShmSystemLock(pFile, _SHM_RDLCK, readMask); - }else{ - rc = SQLITE_OK; - } - - /* Get the local shared locks */ - if( rc==SQLITE_OK ){ - p->sharedMask |= readMask; - } - return rc; -} - -/* -** For connection p, get an exclusive lock on all locks identified in -** the writeMask parameter. -*/ -static int winShmExclusiveLock( - winShmNode *pFile, /* The underlying shared-memory file */ - winShm *p, /* The connection to get the exclusive locks */ - u8 writeMask /* Mask of exclusive locks to be acquired */ -){ - int rc; /* Result code */ - winShm *pX; /* For looping over all sibling connections */ - - /* Access to the winShmNode object is serialized by the caller */ - assert( sqlite3_mutex_held(pFile->mutex) ); - - /* Make sure no sibling connections hold locks that will block this - ** lock. If any do, return SQLITE_BUSY right away. - */ - for(pX=pFile->pFirst; pX; pX=pX->pNext){ - if( pX==p ) continue; - if( (pX->exclMask & writeMask)!=0 ) return SQLITE_BUSY; - if( (pX->sharedMask & writeMask)!=0 ) return SQLITE_BUSY; - } - - /* Get the exclusive locks at the system level. Then if successful - ** also mark the local connection as being locked. - */ - rc = winShmSystemLock(pFile, _SHM_WRLCK, writeMask); - if( rc==SQLITE_OK ){ - p->sharedMask &= ~writeMask; - p->exclMask |= writeMask; - } return rc; } @@ -1680,11 +1432,12 @@ static int winShmOpen( /* Check to see if another process is holding the dead-man switch. ** If not, truncate the file to zero length. */ - if( winShmSystemLock(pShmNode, _SHM_WRLCK, WIN_SHM_DMS)==SQLITE_OK ){ + if( winShmSystemLock(pShmNode, _SHM_WRLCK, WIN_SHM_DMS, 1)==SQLITE_OK ){ rc = winTruncate((sqlite3_file *)&pShmNode->hFile, 0); } if( rc==SQLITE_OK ){ - rc = winShmSystemLock(pShmNode, _SHM_RDLCK, WIN_SHM_DMS); + winShmSystemLock(pShmNode, _SHM_UNLCK, WIN_SHM_DMS, 1); + rc = winShmSystemLock(pShmNode, _SHM_RDLCK, WIN_SHM_DMS, 1); } if( rc ) goto shm_open_err; } @@ -1703,7 +1456,7 @@ static int winShmOpen( /* Jump here on any error */ shm_open_err: - winShmSystemLock(pShmNode, _SHM_UNLCK, WIN_SHM_DMS); + winShmSystemLock(pShmNode, _SHM_UNLCK, WIN_SHM_DMS, 1); winShmPurge(); /* This call frees pShmNode if required */ sqlite3_free(p); sqlite3_free(pNew); @@ -1728,10 +1481,6 @@ static int winShmClose( p = pDbFd->pShm; pShmNode = p->pShmNode; - /* Verify that the connection being closed holds no locks */ - assert( p->exclMask==0 ); - assert( p->sharedMask==0 ); - /* Remove connection p from the set of connections associated ** with pShmNode */ sqlite3_mutex_enter(pShmNode->mutex); @@ -1782,12 +1531,8 @@ static int winShmSize( if( reqSize>=0 ){ sqlite3_int64 sz; rc = winFileSize((sqlite3_file *)&pShmNode->hFile, &sz); - if( SQLITE_OK==rc ){ - reqSize = (reqSize + SQLITE_WIN_SHM_INCR - 1)/SQLITE_WIN_SHM_INCR; - reqSize *= SQLITE_WIN_SHM_INCR; - if( reqSize>sz ){ - rc = winTruncate((sqlite3_file *)&pShmNode->hFile, reqSize); - } + if( SQLITE_OK==rc && reqSize>sz ){ + rc = winTruncate((sqlite3_file *)&pShmNode->hFile, reqSize); } } if( SQLITE_OK==rc ){ @@ -1833,14 +1578,14 @@ static int winShmGet( sqlite3_file *fd, /* The database file holding the shared memory */ int reqMapSize, /* Requested size of mapping. -1 means don't care */ int *pNewMapSize, /* Write new size of mapping here */ - void **ppBuf /* Write mapping buffer origin here */ + void volatile **ppBuf /* Write mapping buffer origin here */ ){ winFile *pDbFd = (winFile*)fd; winShm *p = pDbFd->pShm; winShmNode *pShmNode = p->pShmNode; int rc = SQLITE_OK; - if( p->lockState!=SQLITE_SHM_CHECKPOINT && p->hasMutexBuf==0 ){ + if( p->hasMutexBuf==0 ){ assert( sqlite3_mutex_notheld(pShmNode->mutex) ); sqlite3_mutex_enter(pShmNode->mutexBuf); p->hasMutexBuf = 1; @@ -1920,7 +1665,7 @@ static int winShmGet( static int winShmRelease(sqlite3_file *fd){ winFile *pDbFd = (winFile*)fd; winShm *p = pDbFd->pShm; - if( p->hasMutexBuf && p->lockState!=SQLITE_SHM_RECOVER ){ + if( p->hasMutexBuf ){ winShmNode *pShmNode = p->pShmNode; assert( sqlite3_mutex_notheld(pShmNode->mutex) ); sqlite3_mutex_leave(pShmNode->mutexBuf); @@ -1929,149 +1674,39 @@ static int winShmRelease(sqlite3_file *fd){ return SQLITE_OK; } -/* -** Symbolic names for LOCK states used for debugging. -*/ -#ifdef SQLITE_DEBUG -static const char *azLkName[] = { - "UNLOCK", - "READ", - "READ_FULL", - "WRITE", - "PENDING", - "CHECKPOINT", - "RECOVER" -}; -#endif - - /* ** Change the lock state for a shared-memory segment. */ static int winShmLock( - sqlite3_file *fd, /* Database holding the shared memory */ - int desiredLock, /* One of SQLITE_SHM_xxxxx locking states */ - int *pGotLock /* The lock you actually got */ + sqlite3_file *fd, /* Database file holding the shared memory */ + int ofst, /* First lock to acquire or release */ + int n, /* Number of locks to acquire or release */ + int flags /* What to do with the lock */ ){ winFile *pDbFd = (winFile*)fd; winShm *p = pDbFd->pShm; winShmNode *pShmNode = p->pShmNode; int rc = SQLITE_PROTOCOL; - /* Note that SQLITE_SHM_READ_FULL and SQLITE_SHM_PENDING are never - ** directly requested; they are side effects from requesting - ** SQLITE_SHM_READ and SQLITE_SHM_CHECKPOINT, respectively. - */ - assert( desiredLock==SQLITE_SHM_UNLOCK - || desiredLock==SQLITE_SHM_READ - || desiredLock==SQLITE_SHM_WRITE - || desiredLock==SQLITE_SHM_CHECKPOINT - || desiredLock==SQLITE_SHM_RECOVER ); + assert( ofst>=0 && ofst+n<=SQLITE_SHM_NLOCK ); + assert( n>=1 ); + assert( flags==(SQLITE_SHM_LOCK | SQLITE_SHM_SHARED) + || flags==(SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE) + || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED) + || flags==(SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE) ); + assert( n==1 || (flags & SQLITE_SHM_EXCLUSIVE)!=0 ); - /* Return directly if this is just a lock state query, or if - ** the connection is already in the desired locking state. - */ - if( desiredLock==p->lockState - || (desiredLock==SQLITE_SHM_READ && p->lockState==SQLITE_SHM_READ_FULL) - ){ - OSTRACE(("SHM-LOCK %d shmid-%d, pid-%d request %s and got %s\n", - pShmNode->hFile.h, - p->id, (int)GetCurrentProcessId(), azLkName[desiredLock], - azLkName[p->lockState])); - if( pGotLock ) *pGotLock = p->lockState; - return SQLITE_OK; - } - - OSTRACE(("SHM-LOCK %d shmid-%d, pid-%d request %s->%s\n", - pShmNode->hFile.h, - p->id, (int)GetCurrentProcessId(), azLkName[p->lockState], - azLkName[desiredLock])); - - if( desiredLock==SQLITE_SHM_RECOVER && !p->hasMutexBuf ){ - assert( sqlite3_mutex_notheld(pShmNode->mutex) ); - sqlite3_mutex_enter(pShmNode->mutexBuf); - p->hasMutexBuf = 1; - } sqlite3_mutex_enter(pShmNode->mutex); - switch( desiredLock ){ - case SQLITE_SHM_UNLOCK: { - assert( p->lockState!=SQLITE_SHM_RECOVER ); - winShmUnlock(pShmNode, p, WIN_SHM_A|WIN_SHM_B|WIN_SHM_C|WIN_SHM_D); - rc = SQLITE_OK; - p->lockState = SQLITE_SHM_UNLOCK; - break; - } - case SQLITE_SHM_READ: { - if( p->lockState==SQLITE_SHM_UNLOCK ){ - int nAttempt; - rc = SQLITE_BUSY; - assert( p->lockState==SQLITE_SHM_UNLOCK ); - for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){ - rc = winShmSharedLock(pShmNode, p, WIN_SHM_A|WIN_SHM_B); - if( rc==SQLITE_BUSY ){ - rc = winShmSharedLock(pShmNode, p, WIN_SHM_D); - if( rc==SQLITE_OK ){ - p->lockState = SQLITE_SHM_READ_FULL; - } - }else{ - winShmUnlock(pShmNode, p, WIN_SHM_B); - p->lockState = SQLITE_SHM_READ; - } - } - }else{ - assert( p->lockState==SQLITE_SHM_WRITE - || p->lockState==SQLITE_SHM_RECOVER ); - rc = winShmSharedLock(pShmNode, p, WIN_SHM_A); - winShmUnlock(pShmNode, p, WIN_SHM_C|WIN_SHM_D); - p->lockState = SQLITE_SHM_READ; - } - break; - } - case SQLITE_SHM_WRITE: { - assert( p->lockState==SQLITE_SHM_READ - || p->lockState==SQLITE_SHM_READ_FULL ); - rc = winShmExclusiveLock(pShmNode, p, WIN_SHM_C|WIN_SHM_D); - if( rc==SQLITE_OK ){ - p->lockState = SQLITE_SHM_WRITE; - } - break; - } - case SQLITE_SHM_CHECKPOINT: { - assert( p->lockState==SQLITE_SHM_UNLOCK - || p->lockState==SQLITE_SHM_PENDING - ); - if( p->lockState==SQLITE_SHM_UNLOCK ){ - rc = winShmExclusiveLock(pShmNode, p, WIN_SHM_B|WIN_SHM_C); - if( rc==SQLITE_OK ){ - p->lockState = SQLITE_SHM_PENDING; - } - } - if( p->lockState==SQLITE_SHM_PENDING ){ - rc = winShmExclusiveLock(pShmNode, p, WIN_SHM_A); - if( rc==SQLITE_OK ){ - p->lockState = SQLITE_SHM_CHECKPOINT; - } - } - break; - } - default: { - assert( desiredLock==SQLITE_SHM_RECOVER ); - assert( p->lockState==SQLITE_SHM_READ - || p->lockState==SQLITE_SHM_READ_FULL - ); - assert( sqlite3_mutex_held(pShmNode->mutexBuf) ); - rc = winShmExclusiveLock(pShmNode, p, WIN_SHM_C); - if( rc==SQLITE_OK ){ - p->lockState = SQLITE_SHM_RECOVER; - } - break; - } + if( flags & SQLITE_SHM_UNLOCK ){ + rc = winShmSystemLock(pShmNode, _SHM_UNLCK, ofst+WIN_SHM_BASE, n); + }else if( flags & SQLITE_SHM_SHARED ){ + rc = winShmSystemLock(pShmNode, _SHM_RDLCK, ofst+WIN_SHM_BASE, n); + }else{ + rc = winShmSystemLock(pShmNode, _SHM_WRLCK, ofst+WIN_SHM_BASE, n); } sqlite3_mutex_leave(pShmNode->mutex); - OSTRACE(("SHM-LOCK %d shmid-%d, pid-%d got %s\n", - pShmNode->hFile.h, - p->id, (int)GetCurrentProcessId(), azLkName[p->lockState])); - if( pGotLock ) *pGotLock = p->lockState; + OSTRACE(("SHM-LOCK shmid-%d, pid-%d %s\n", + p->id, (int)GetCurrentProcessId(), rc ? "failed" : "ok")); return rc; } diff --git a/src/pager.c b/src/pager.c index 5a3d35f7f3..3eb1233483 100644 --- a/src/pager.c +++ b/src/pager.c @@ -1203,7 +1203,7 @@ static int pagerUseWal(Pager *pPager){ # define pagerRollbackWal(x) 0 # define pagerWalFrames(v,w,x,y,z) 0 # define pagerOpenWalIfPresent(z) SQLITE_OK -# define pagerOpenSnapshot(z) SQLITE_OK +# define pagerBeginReadTransaction(z) SQLITE_OK #endif /* @@ -1238,7 +1238,7 @@ static void pager_unlock(Pager *pPager){ pPager->dbSizeValid = 0; if( pagerUseWal(pPager) ){ - sqlite3WalCloseSnapshot(pPager->pWal); + sqlite3WalEndReadTransaction(pPager->pWal); }else{ rc = osUnlock(pPager->fd, NO_LOCK); } @@ -1437,7 +1437,7 @@ static int pager_end_transaction(Pager *pPager, int hasMaster){ sqlite3PcacheCleanAll(pPager->pPCache); if( pagerUseWal(pPager) ){ - rc2 = sqlite3WalWriteLock(pPager->pWal, 0); + rc2 = sqlite3WalEndWriteTransaction(pPager->pWal); pPager->state = PAGER_SHARED; /* If the connection was in locking_mode=exclusive mode but is no longer, @@ -1445,9 +1445,8 @@ static int pager_end_transaction(Pager *pPager, int hasMaster){ */ if( rc2==SQLITE_OK && !pPager->exclusiveMode - && sqlite3WalExclusiveMode(pPager->pWal, -1) + && sqlite3WalExclusiveMode(pPager->pWal, 0) ){ - sqlite3WalExclusiveMode(pPager->pWal, 0); rc2 = osUnlock(pPager->fd, SHARED_LOCK); } }else if( !pPager->exclusiveMode ){ @@ -2362,15 +2361,27 @@ static int pagerWalFrames( } /* -** Open a WAL snapshot on the log file this pager is connected to. +** Begin a read transaction on the WAL. +** +** This routine used to be called "pagerOpenSnapshot()" because it essentially +** makes a snapshot of the database at the current point in time and preserves +** that snapshot for use by the reader in spite of concurrently changes by +** other writers or checkpointers. */ -static int pagerOpenSnapshot(Pager *pPager){ +static int pagerBeginReadTransaction(Pager *pPager){ int rc; /* Return code */ int changed = 0; /* True if cache must be reset */ assert( pagerUseWal(pPager) ); - rc = sqlite3WalOpenSnapshot(pPager->pWal, &changed); + /* sqlite3WalEndReadTransaction() was not called for the previous + ** transaction in locking_mode=EXCLUSIVE. So call it now. If we + ** are in locking_mode=NORMAL and EndRead() was previously called, + ** the duplicate call is harmless. + */ + sqlite3WalEndReadTransaction(pPager->pWal); + + rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed); if( rc==SQLITE_OK ){ int dummy; if( changed ){ @@ -2428,7 +2439,7 @@ static int pagerOpenWalIfPresent(Pager *pPager){ pager_reset(pPager); rc = sqlite3PagerOpenWal(pPager, 0); if( rc==SQLITE_OK ){ - rc = pagerOpenSnapshot(pPager); + rc = pagerBeginReadTransaction(pPager); } }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){ pPager->journalMode = PAGER_JOURNALMODE_DELETE; @@ -4002,7 +4013,7 @@ int sqlite3PagerSharedLock(Pager *pPager){ } if( pagerUseWal(pPager) ){ - rc = pagerOpenSnapshot(pPager); + rc = pagerBeginReadTransaction(pPager); }else if( pPager->state==PAGER_UNLOCK || isErrorReset ){ sqlite3_vfs * const pVfs = pPager->pVfs; int isHotJournal = 0; @@ -4541,7 +4552,7 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ /* If the pager is configured to use locking_mode=exclusive, and an ** exclusive lock on the database is not already held, obtain it now. */ - if( pPager->exclusiveMode && !sqlite3WalExclusiveMode(pPager->pWal, -1) ){ + if( pPager->exclusiveMode && sqlite3WalExclusiveMode(pPager->pWal, -1) ){ rc = sqlite3OsLock(pPager->fd, EXCLUSIVE_LOCK); pPager->state = PAGER_SHARED; if( rc!=SQLITE_OK ){ @@ -4561,7 +4572,7 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){ ** may copy data from the sub-journal into the database file as well ** as into the page cache. Which would be incorrect in WAL mode. */ - rc = sqlite3WalWriteLock(pPager->pWal, 1); + rc = sqlite3WalBeginWriteTransaction(pPager->pWal); if( rc==SQLITE_OK ){ pPager->dbOrigSize = pPager->dbSize; pPager->state = PAGER_RESERVED; @@ -5892,8 +5903,7 @@ int sqlite3PagerCheckpoint(Pager *pPager){ u8 *zBuf = (u8 *)pPager->pTmpSpace; rc = sqlite3WalCheckpoint(pPager->pWal, (pPager->noSync ? 0 : pPager->sync_flags), - pPager->pageSize, zBuf, - pPager->xBusyHandler, pPager->pBusyHandlerArg + pPager->pageSize, zBuf ); } return rc; diff --git a/src/sqlite.h.in b/src/sqlite.h.in index 0256399037..0b931dcc9f 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -444,7 +444,8 @@ int sqlite3_exec( #define SQLITE_IOERR_LOCK (SQLITE_IOERR | (15<<8)) #define SQLITE_IOERR_CLOSE (SQLITE_IOERR | (16<<8)) #define SQLITE_IOERR_DIR_CLOSE (SQLITE_IOERR | (17<<8)) -#define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8) ) +#define SQLITE_LOCKED_SHAREDCACHE (SQLITE_LOCKED | (1<<8)) +#define SQLITE_BUSY_RECOVERY (SQLITE_BUSY | (1<<8)) /* ** CAPI3REF: Flags For File Open Operations @@ -658,7 +659,7 @@ struct sqlite3_io_methods { int (*xShmSize)(sqlite3_file*, int reqSize, int *pNewSize); int (*xShmGet)(sqlite3_file*, int reqSize, int *pSize, void volatile**); int (*xShmRelease)(sqlite3_file*); - int (*xShmLock)(sqlite3_file*, int desiredLock, int *gotLock); + int (*xShmLock)(sqlite3_file*, int offset, int n, int flags); void (*xShmBarrier)(sqlite3_file*); int (*xShmClose)(sqlite3_file*, int deleteFlag); /* Methods above are valid for version 2 */ @@ -888,16 +889,40 @@ struct sqlite3_vfs { /* ** CAPI3REF: Flags for the xShmLock VFS method ** -** These integer constants define the various locking states that -** an sqlite3_shm object can be in. +** These integer constants define the various locking operations +** allowed by the xShmLock method of [sqlite3_io_methods]. The +** following are the only legal combinations of flags to the +** xShmLock method: +** +**
    +**
  • SQLITE_SHM_LOCK | SQLITE_SHM_SHARED +**
  • SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE +**
  • SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED +**
  • SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE +**
+** +** When unlocking, the same SHARED or EXCLUSIVE flag must be supplied as +** was given no the corresponding lock. +** +** The xShmLock method can transition between unlocked and SHARED or +** between unlocked and EXCLUSIVE. It cannot transition between SHARED +** and EXCLUSIVE. */ -#define SQLITE_SHM_UNLOCK 0 -#define SQLITE_SHM_READ 1 -#define SQLITE_SHM_READ_FULL 2 -#define SQLITE_SHM_WRITE 3 -#define SQLITE_SHM_PENDING 4 -#define SQLITE_SHM_CHECKPOINT 5 -#define SQLITE_SHM_RECOVER 6 +#define SQLITE_SHM_UNLOCK 1 +#define SQLITE_SHM_LOCK 2 +#define SQLITE_SHM_SHARED 4 +#define SQLITE_SHM_EXCLUSIVE 8 + +/* +** CAPI3REF: Maximum xShmLock index +** +** The xShmLock method on [sqlite3_io_methods] may use values +** between 0 and this upper bound as its "offset" argument. +** The SQLite core will never attempt to acquire or release a +** lock outside of this range +*/ +#define SQLITE_SHM_NLOCK 8 + /* ** CAPI3REF: Initialize The SQLite Library diff --git a/src/test1.c b/src/test1.c index 687c009223..e636bb1b89 100644 --- a/src/test1.c +++ b/src/test1.c @@ -4609,7 +4609,7 @@ static int file_control_lasterrno_test( } /* -** tclcmd: file_control_lockproxy_test DB +** tclcmd: file_control_lockproxy_test DB PWD ** ** This TCL command runs the sqlite3_file_control interface and ** verifies correct operation of the SQLITE_GET_LOCKPROXYFILE and @@ -4622,15 +4622,18 @@ static int file_control_lockproxy_test( Tcl_Obj *CONST objv[] /* Command arguments */ ){ sqlite3 *db; + const char *zPwd; + int nPwd; - if( objc!=2 ){ + if( objc!=3 ){ Tcl_AppendResult(interp, "wrong # args: should be \"", - Tcl_GetStringFromObj(objv[0], 0), " DB", 0); + Tcl_GetStringFromObj(objv[0], 0), " DB PWD", 0); return TCL_ERROR; } if( getDbPointer(interp, Tcl_GetString(objv[1]), &db) ){ return TCL_ERROR; } + zPwd = Tcl_GetStringFromObj(objv[2], &nPwd); #if !defined(SQLITE_ENABLE_LOCKING_STYLE) # if defined(__APPLE__) @@ -4641,9 +4644,15 @@ static int file_control_lockproxy_test( #endif #if SQLITE_ENABLE_LOCKING_STYLE && defined(__APPLE__) { - char *proxyPath = "test.proxy"; char *testPath; int rc; + char proxyPath[400]; + + if( sizeof(proxyPath)pRealFile); } -static int cfShmLock(sqlite3_file *pFile, int desired, int *pGot){ - return sqlite3OsShmLock(((CrashFile*)pFile)->pRealFile, desired, pGot); +static int cfShmLock(sqlite3_file *pFile, int ofst, int n, int flags){ + return sqlite3OsShmLock(((CrashFile*)pFile)->pRealFile, ofst, n, flags); } static void cfShmBarrier(sqlite3_file *pFile){ sqlite3OsShmBarrier(((CrashFile*)pFile)->pRealFile); diff --git a/src/test_devsym.c b/src/test_devsym.c index 196dccf286..0464804939 100644 --- a/src/test_devsym.c +++ b/src/test_devsym.c @@ -54,7 +54,7 @@ static int devsymShmOpen(sqlite3_file*); static int devsymShmSize(sqlite3_file*,int,int*); static int devsymShmGet(sqlite3_file*,int,int*,volatile void**); static int devsymShmRelease(sqlite3_file*); -static int devsymShmLock(sqlite3_file*,int,int*); +static int devsymShmLock(sqlite3_file*,int,int,int); static void devsymShmBarrier(sqlite3_file*); static int devsymShmClose(sqlite3_file*,int); @@ -263,9 +263,9 @@ static int devsymShmRelease(sqlite3_file *pFile){ devsym_file *p = (devsym_file *)pFile; return sqlite3OsShmRelease(p->pReal); } -static int devsymShmLock(sqlite3_file *pFile, int desired, int *pGot){ +static int devsymShmLock(sqlite3_file *pFile, int ofst, int n, int flags){ devsym_file *p = (devsym_file *)pFile; - return sqlite3OsShmLock(p->pReal, desired, pGot); + return sqlite3OsShmLock(p->pReal, ofst, n, flags); } static void devsymShmBarrier(sqlite3_file *pFile){ devsym_file *p = (devsym_file *)pFile; diff --git a/src/test_osinst.c b/src/test_osinst.c index 6697fa1ecd..52e04fb9ed 100644 --- a/src/test_osinst.c +++ b/src/test_osinst.c @@ -155,7 +155,7 @@ static int vfslogShmOpen(sqlite3_file *pFile); static int vfslogShmSize(sqlite3_file *pFile, int reqSize, int *pNewSize); static int vfslogShmGet(sqlite3_file *pFile, int,int*,volatile void **); static int vfslogShmRelease(sqlite3_file *pFile); -static int vfslogShmLock(sqlite3_file *pFile, int desiredLock, int *gotLock); +static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags); static void vfslogShmBarrier(sqlite3_file*); static int vfslogShmClose(sqlite3_file *pFile, int deleteFlag); @@ -460,12 +460,12 @@ static int vfslogShmRelease(sqlite3_file *pFile){ vfslog_call(p->pVfslog, OS_SHMRELEASE, p->iFileId, t, rc, 0, 0); return rc; } -static int vfslogShmLock(sqlite3_file *pFile, int desiredLock, int *gotLock){ +static int vfslogShmLock(sqlite3_file *pFile, int ofst, int n, int flags){ int rc; sqlite3_uint64 t; VfslogFile *p = (VfslogFile *)pFile; t = vfslog_time(); - rc = p->pReal->pMethods->xShmLock(p->pReal, desiredLock, gotLock); + rc = p->pReal->pMethods->xShmLock(p->pReal, ofst, n, flags); t = vfslog_time() - t; vfslog_call(p->pVfslog, OS_SHMLOCK, p->iFileId, t, rc, 0, 0); return rc; diff --git a/src/test_vfs.c b/src/test_vfs.c index f95587931e..d5e8ea1faf 100644 --- a/src/test_vfs.c +++ b/src/test_vfs.c @@ -102,7 +102,7 @@ static int tvfsShmOpen(sqlite3_file*); static int tvfsShmSize(sqlite3_file*, int , int *); static int tvfsShmGet(sqlite3_file*, int , int *, volatile void **); static int tvfsShmRelease(sqlite3_file*); -static int tvfsShmLock(sqlite3_file*, int , int *); +static int tvfsShmLock(sqlite3_file*, int , int, int); static void tvfsShmBarrier(sqlite3_file*); static int tvfsShmClose(sqlite3_file*, int); @@ -544,31 +544,34 @@ static int tvfsShmRelease(sqlite3_file *pFile){ static int tvfsShmLock( sqlite3_file *pFile, - int desiredLock, - int *gotLock + int ofst, + int n, + int flags ){ int rc = SQLITE_OK; TestvfsFile *pFd = (TestvfsFile *)pFile; Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); - char *zLock = ""; + int nLock; + char zLock[80]; - switch( desiredLock ){ - case SQLITE_SHM_READ: zLock = "READ"; break; - case SQLITE_SHM_WRITE: zLock = "WRITE"; break; - case SQLITE_SHM_CHECKPOINT: zLock = "CHECKPOINT"; break; - case SQLITE_SHM_RECOVER: zLock = "RECOVER"; break; - case SQLITE_SHM_PENDING: zLock = "PENDING"; break; - case SQLITE_SHM_UNLOCK: zLock = "UNLOCK"; break; + sqlite3_snprintf(sizeof(zLock), zLock, "%d %d", ofst, n); + nLock = strlen(zLock); + if( flags & SQLITE_SHM_LOCK ){ + strcpy(&zLock[nLock], " lock"); + }else{ + strcpy(&zLock[nLock], " unlock"); + } + nLock += strlen(&zLock[nLock]); + if( flags & SQLITE_SHM_SHARED ){ + strcpy(&zLock[nLock], " shared"); + }else{ + strcpy(&zLock[nLock], " exclusive"); } tvfsExecTcl(p, "xShmLock", Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, Tcl_NewStringObj(zLock, -1) ); tvfsResultCode(p, &rc); - if( rc==SQLITE_OK ){ - *gotLock = desiredLock; - } - return rc; } @@ -716,9 +719,7 @@ static void testvfs_obj_del(ClientData cd){ ** ** SCRIPT xShmLock FILENAME ID LOCK ** -** where LOCK is one of "UNLOCK", "READ", "READ_FULL", "WRITE", "PENDING", -** "CHECKPOINT" or "RECOVER". The script should return an SQLite error -** code. +** where LOCK is of the form "OFFSET NBYTE lock/unlock shared/exclusive" */ static int testvfs_cmd( ClientData cd, diff --git a/src/vdbe.c b/src/vdbe.c index fc2b92f15b..89a8c340be 100644 --- a/src/vdbe.c +++ b/src/vdbe.c @@ -480,22 +480,6 @@ static void registerTrace(FILE *out, int iReg, Mem *p){ #define CHECK_FOR_INTERRUPT \ if( db->u1.isInterrupted ) goto abort_due_to_interrupt; -#ifdef SQLITE_DEBUG -static int fileExists(sqlite3 *db, const char *zFile){ - int res = 0; - int rc = SQLITE_OK; -#ifdef SQLITE_TEST - /* If we are currently testing IO errors, then do not call OsAccess() to - ** test for the presence of zFile. This is because any IO error that - ** occurs here will not be reported, causing the test to fail. - */ - extern int sqlite3_io_error_pending; - if( sqlite3_io_error_pending<=0 ) -#endif - rc = sqlite3OsAccess(db->pVfs, zFile, SQLITE_ACCESS_EXISTS, &res); - return (res && rc==SQLITE_OK); -} -#endif #ifndef NDEBUG /* @@ -594,9 +578,7 @@ int sqlite3VdbeExec( #endif #ifdef SQLITE_DEBUG sqlite3BeginBenignMalloc(); - if( p->pc==0 - && ((p->db->flags & SQLITE_VdbeListing) || fileExists(db, "vdbe_explain")) - ){ + if( p->pc==0 && (p->db->flags & SQLITE_VdbeListing)!=0 ){ int i; printf("VDBE Program Listing:\n"); sqlite3VdbePrintSql(p); @@ -604,9 +586,6 @@ int sqlite3VdbeExec( sqlite3VdbePrintOp(stdout, i, &aOp[i]); } } - if( fileExists(db, "vdbe_trace") ){ - p->trace = stdout; - } sqlite3EndBenignMalloc(); #endif for(pc=p->pc; rc==SQLITE_OK; pc++){ @@ -628,13 +607,6 @@ int sqlite3VdbeExec( } sqlite3VdbePrintOp(p->trace, pc, pOp); } - if( p->trace==0 && pc==0 ){ - sqlite3BeginBenignMalloc(); - if( fileExists(db, "vdbe_sqltrace") ){ - sqlite3VdbePrintSql(p); - } - sqlite3EndBenignMalloc(); - } #endif diff --git a/src/wal.c b/src/wal.c index 2bbbdcd5e7..dbac31ae35 100644 --- a/src/wal.c +++ b/src/wal.c @@ -93,12 +93,22 @@ ** ** To read a page from the database (call it page number P), a reader ** first checks the WAL to see if it contains page P. If so, then the -** last valid instance of page P that is or is followed by a commit frame -** become the value read. If the WAL contains no copies of page P that -** are valid and which are or are followed by a commit frame, then page -** P is read from the database file. +** last valid instance of page P that is a followed by a commit frame +** or is a commit frame itself becomes the value read. If the WAL +** contains no copies of page P that are valid and which are a commit +** frame or are followed by a commit frame, then page P is read from +** the database file. ** -** The reader algorithm in the previous paragraph works correctly, but +** To start a read transaction, the reader records the index of the last +** valid frame in the WAL. The reader uses this recorded "mxFrame" value +** for all subsequent read operations. New transactions can be appended +** to the WAL, but as long as the reader uses its original mxFrame value +** and ignores the newly appended content, it will see a consistent snapshot +** of the database from a single point in time. This technique allows +** multiple concurrent readers to view different versions of the database +** content simultaneously. +** +** The reader algorithm in the previous paragraphs works correctly, but ** because frames for page P can appear anywhere within the WAL, the ** reader has to scan the entire WAL looking for page P frames. If the ** WAL is large (multiple megabytes is typical) that scan can be slow, @@ -161,8 +171,7 @@ ** the mapping section. (For index blocks other than the last, K will ** always be exactly HASHTABLE_NPAGE (4096) and for the last index block ** K will be (mxFrame%HASHTABLE_NPAGE).) Unused slots of the hash table -** contain a value greater than K. Note that no hash table slot ever -** contains a zero value. +** contain a value of 0. ** ** To look for page P in the hash table, first compute a hash iKey on ** P as follows: @@ -214,10 +223,33 @@ #include "wal.h" +/* +** Trace output macros +*/ +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) +int sqlite3WalTrace = 0; +# define WALTRACE(X) if(sqlite3WalTrace) sqlite3DebugPrintf X +#else +# define WALTRACE(X) +#endif + + +/* +** Indices of various locking bytes. WAL_NREADER is the number +** of available reader locks and should be at least 3. +*/ +#define WAL_WRITE_LOCK 0 +#define WAL_ALL_BUT_WRITE 1 +#define WAL_CKPT_LOCK 1 +#define WAL_RECOVER_LOCK 2 +#define WAL_READ_LOCK(I) (3+(I)) +#define WAL_NREADER (SQLITE_SHM_NLOCK-3) + /* Object declarations */ typedef struct WalIndexHdr WalIndexHdr; typedef struct WalIterator WalIterator; +typedef struct WalCkptInfo WalCkptInfo; /* @@ -228,7 +260,8 @@ typedef struct WalIterator WalIterator; */ struct WalIndexHdr { u32 iChange; /* Counter incremented each transaction */ - u16 bigEndCksum; /* True if checksums in WAL are big-endian */ + u8 isInit; /* 1 when initialized */ + u8 bigEndCksum; /* True if checksums in WAL are big-endian */ u16 szPage; /* Database page size in bytes */ u32 mxFrame; /* Index of last valid frame in the WAL */ u32 nPage; /* Size of database in pages */ @@ -237,13 +270,65 @@ struct WalIndexHdr { u32 aCksum[2]; /* Checksum over all prior fields */ }; +/* +** A copy of the following object occurs in the wal-index immediately +** following the second copy of the WalIndexHdr. This object stores +** information used by checkpoint. +** +** nBackfill is the number of frames in the WAL that have been written +** back into the database. (We call the act of moving content from WAL to +** database "backfilling".) The nBackfill number is never greater than +** WalIndexHdr.mxFrame. nBackfill can only be increased by threads +** holding the WAL_CKPT_LOCK lock (which includes a recovery thread). +** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from +** mxFrame back to zero when the WAL is reset. +** +** There is one entry in aReadMark[] for each reader lock. If a reader +** holds read-lock K, then the value in aReadMark[K] is no greater than +** the mxFrame for that reader. aReadMark[0] is a special case. It +** always holds zero. Readers holding WAL_READ_LOCK(0) always ignore +** the entire WAL and read all content directly from the database. +** +** The value of aReadMark[K] may only be changed by a thread that +** is holding an exclusive lock on WAL_READ_LOCK(K). Thus, the value of +** aReadMark[K] cannot changed while there is a reader is using that mark +** since the reader will be holding a shared lock on WAL_READ_LOCK(K). +** +** The checkpointer may only transfer frames from WAL to database where +** the frame numbers are less than or equal to every aReadMark[] that is +** in use (that is, every aReadMark[j] for which there is a corresponding +** WAL_READ_LOCK(j)). New readers (usually) pick the aReadMark[] with the +** largest value and will increase an unused aReadMark[] to mxFrame if there +** is not already an aReadMark[] equal to mxFrame. The exception to the +** previous sentence is when nBackfill equals mxFrame (meaning that everything +** in the WAL has been backfilled into the database) then new readers +** will choose aReadMark[0] which has value 0 and hence such reader will +** get all their all content directly from the database file and ignore +** the WAL. +** +** Writers normally append new frames to the end of the WAL. However, +** if nBackfill equals mxFrame (meaning that all WAL content has been +** written back into the database) and if no readers are using the WAL +** (in other words, if there are no WAL_READ_LOCK(i) where i>0) then +** the writer will first "reset" the WAL back to the beginning and start +** writing new content beginning at frame 1. +** +** We assume that 32-bit loads are atomic and so no locks are needed in +** order to read from any aReadMark[] entries. +*/ +struct WalCkptInfo { + u32 nBackfill; /* Number of WAL frames backfilled into DB */ + u32 aReadMark[WAL_NREADER]; /* Reader marks */ +}; + + /* A block of WALINDEX_LOCK_RESERVED bytes beginning at ** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems ** only support mandatory file-locks, we do not read or write data ** from the region of the file on which locks are applied. */ -#define WALINDEX_LOCK_OFFSET (sizeof(WalIndexHdr)*2) -#define WALINDEX_LOCK_RESERVED 8 +#define WALINDEX_LOCK_OFFSET (sizeof(WalIndexHdr)*2 + sizeof(WalCkptInfo)) +#define WALINDEX_LOCK_RESERVED 16 #define WALINDEX_HDR_SIZE (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED) /* Size of header before each frame in wal */ @@ -277,22 +362,31 @@ struct WalIndexHdr { ** following object. */ struct Wal { - sqlite3_vfs *pVfs; /* The VFS used to create pFd */ + sqlite3_vfs *pVfs; /* The VFS used to create pDbFd */ sqlite3_file *pDbFd; /* File handle for the database file */ sqlite3_file *pWalFd; /* File handle for WAL file */ u32 iCallback; /* Value to pass to log callback (or 0) */ int szWIndex; /* Size of the wal-index that is mapped in mem */ volatile u32 *pWiData; /* Pointer to wal-index content in memory */ - u8 lockState; /* SQLITE_SHM_xxxx constant showing lock state */ - u8 readerType; /* SQLITE_SHM_READ or SQLITE_SHM_READ_FULL */ + u16 szPage; /* Database page size */ + i16 readLock; /* Which read lock is being held. -1 for none */ u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ - u8 isWindexOpen; /* True if ShmOpen() called on pDbFd */ - WalIndexHdr hdr; /* Wal-index for current snapshot */ + u8 isWIndexOpen; /* True if ShmOpen() called on pDbFd */ + u8 writeLock; /* True if in a write transaction */ + u8 ckptLock; /* True if holding a checkpoint lock */ + WalIndexHdr hdr; /* Wal-index header for current transaction */ char *zWalName; /* Name of WAL file */ - int szPage; /* Database page size */ u32 nCkpt; /* Checkpoint sequence counter in the wal-header */ }; +/* +** Return a pointer to the WalCkptInfo structure in the wal-index. +*/ +static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ + assert( pWal->pWiData!=0 ); + return (volatile WalCkptInfo*)&pWal->pWiData[sizeof(WalIndexHdr)/2]; +} + /* ** This structure is used to implement an iterator that loops through @@ -379,29 +473,6 @@ static void walChecksumBytes( aOut[1] = s2; } -/* -** Attempt to change the lock status. -** -** When changing the lock status to SQLITE_SHM_READ, store the -** type of reader lock (either SQLITE_SHM_READ or SQLITE_SHM_READ_FULL) -** in pWal->readerType. -*/ -static int walSetLock(Wal *pWal, int desiredStatus){ - int rc = SQLITE_OK; /* Return code */ - if( pWal->exclusiveMode || pWal->lockState==desiredStatus ){ - pWal->lockState = desiredStatus; - }else{ - int got = pWal->lockState; - rc = sqlite3OsShmLock(pWal->pDbFd, desiredStatus, &got); - pWal->lockState = got; - if( got==SQLITE_SHM_READ_FULL || got==SQLITE_SHM_READ ){ - pWal->readerType = got; - pWal->lockState = SQLITE_SHM_READ; - } - } - return rc; -} - /* ** Write the header information in pWal->hdr into the wal-index. ** @@ -409,13 +480,15 @@ static int walSetLock(Wal *pWal, int desiredStatus){ */ static void walIndexWriteHdr(Wal *pWal){ WalIndexHdr *aHdr; - walChecksumBytes(1, (u8*)&pWal->hdr, - sizeof(pWal->hdr) - sizeof(pWal->hdr.aCksum), + + assert( pWal->writeLock ); + pWal->hdr.isInit = 1; + walChecksumBytes(1, (u8*)&pWal->hdr, offsetof(WalIndexHdr, aCksum), 0, pWal->hdr.aCksum); aHdr = (WalIndexHdr*)pWal->pWiData; - memcpy(&aHdr[1], &pWal->hdr, sizeof(pWal->hdr)); + memcpy(&aHdr[1], &pWal->hdr, sizeof(WalIndexHdr)); sqlite3OsShmBarrier(pWal->pDbFd); - memcpy(&aHdr[0], &pWal->hdr, sizeof(pWal->hdr)); + memcpy(&aHdr[0], &pWal->hdr, sizeof(WalIndexHdr)); } /* @@ -520,6 +593,67 @@ static int walDecodeFrame( #define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */ #define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT) +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) +/* +** Names of locks. This routine is used to provide debugging output and is not +** a part of an ordinary build. +*/ +static const char *walLockName(int lockIdx){ + if( lockIdx==WAL_WRITE_LOCK ){ + return "WRITE-LOCK"; + }else if( lockIdx==WAL_CKPT_LOCK ){ + return "CKPT-LOCK"; + }else if( lockIdx==WAL_RECOVER_LOCK ){ + return "RECOVER-LOCK"; + }else{ + static char zName[15]; + sqlite3_snprintf(sizeof(zName), zName, "READ-LOCK[%d]", + lockIdx-WAL_READ_LOCK(0)); + return zName; + } +} +#endif /*defined(SQLITE_TEST) || defined(SQLITE_DEBUG) */ + + +/* +** Set or release locks on the WAL. Locks are either shared or exclusive. +** A lock cannot be moved directly between shared and exclusive - it must go +** through the unlocked state first. +** +** In locking_mode=EXCLUSIVE, all of these routines become no-ops. +*/ +static int walLockShared(Wal *pWal, int lockIdx){ + int rc; + if( pWal->exclusiveMode ) return SQLITE_OK; + rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, + SQLITE_SHM_LOCK | SQLITE_SHM_SHARED); + WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal, + walLockName(lockIdx), rc ? "failed" : "ok")); + return rc; +} +static void walUnlockShared(Wal *pWal, int lockIdx){ + if( pWal->exclusiveMode ) return; + (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1, + SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED); + WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx))); +} +static int walLockExclusive(Wal *pWal, int lockIdx, int n){ + int rc; + if( pWal->exclusiveMode ) return SQLITE_OK; + rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, + SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE); + WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal, + walLockName(lockIdx), n, rc ? "failed" : "ok")); + return rc; +} +static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){ + if( pWal->exclusiveMode ) return; + (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n, + SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE); + WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal, + walLockName(lockIdx), n)); +} + /* ** Return the index in the Wal.pWiData array that corresponds to ** frame iFrame. @@ -543,10 +677,10 @@ static int walIndexEntry(u32 iFrame){ } /* -** Return the minimum mapping size in bytes that can be used to read the -** wal-index up to and including frame iFrame. If iFrame is the last frame -** in a block of 256 frames, the returned byte-count includes the space -** required by the 256-byte index block. +** Return the minimum size of the shared-memory, in bytes, that is needed +** to support a wal-index containing frame iFrame. The value returned +** includes the wal-index header and the complete "block" containing iFrame, +** including the hash table segment that follows the block. */ static int walMappingSize(u32 iFrame){ const int nByte = (sizeof(u32)*HASHTABLE_NPAGE + HASHTABLE_NBYTE) ; @@ -600,7 +734,7 @@ static int walIndexMap(Wal *pWal, int reqSize){ static int walIndexRemap(Wal *pWal, int enlargeTo){ int rc; int sz; - assert( pWal->lockState>=SQLITE_SHM_WRITE ); + assert( pWal->writeLock ); rc = sqlite3OsShmSize(pWal->pDbFd, enlargeTo, &sz); if( rc==SQLITE_OK && sz>pWal->szWIndex ){ walIndexUnmap(pWal); @@ -612,7 +746,8 @@ static int walIndexRemap(Wal *pWal, int enlargeTo){ /* ** Compute a hash on a page number. The resulting hash value must land -** between 0 and (HASHTABLE_NSLOT-1). +** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances +** the hash to the next value in the event of a collision. */ static int walHash(u32 iPage){ assert( iPage>0 ); @@ -675,22 +810,28 @@ static void walHashFind( ** This function is called whenever pWal->hdr.mxFrame is decreased due ** to a rollback or savepoint. ** -** At most only the very last hash table needs to be updated. Any -** later hash tables will be automatically cleared when pWal->hdr.mxFrame -** advances to the point where those hash tables are actually needed. +** At most only the hash table containing pWal->hdr.mxFrame needs to be +** updated. Any later hash tables will be automatically cleared when +** pWal->hdr.mxFrame advances to the point where those hash tables are +** actually needed. */ static void walCleanupHash(Wal *pWal){ volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table to clear */ volatile u32 *aPgno; /* Unused return from walHashFind() */ u32 iZero; /* frame == (aHash[x]+iZero) */ - int iLimit; /* Zero values greater than this */ + int iLimit = 0; /* Zero values greater than this */ - assert( pWal->lockState==SQLITE_SHM_WRITE ); - walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero); - iLimit = pWal->hdr.mxFrame - iZero; - if( iLimit>0 ){ + assert( pWal->writeLock ); + testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE-1 ); + testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE ); + testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE+1 ); + if( (pWal->hdr.mxFrame % HASHTABLE_NPAGE)>0 ){ int nByte; /* Number of bytes to zero in aPgno[] */ int i; /* Used to iterate through aHash[] */ + + walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero); + iLimit = pWal->hdr.mxFrame - iZero; + assert( iLimit>0 ); for(i=0; iiLimit ){ aHash[i] = 0; @@ -709,7 +850,7 @@ static void walCleanupHash(Wal *pWal){ /* Verify that the every entry in the mapping region is still reachable ** via the hash table even after the cleanup. */ - { + if( iLimit ){ int i; /* Loop counter */ int iKey; /* Hash key */ for(i=1; i<=iLimit; i++){ @@ -810,19 +951,44 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ /* ** Recover the wal-index by reading the write-ahead log file. -** The caller must hold RECOVER lock on the wal-index file. +** +** This routine first tries to establish an exclusive lock on the +** wal-index to prevent other threads/processes from doing anything +** with the WAL or wal-index while recovery is running. The +** WAL_RECOVER_LOCK is also held so that other threads will know +** that this thread is running recovery. If unable to establish +** the necessary locks, this routine returns SQLITE_BUSY. */ static int walIndexRecover(Wal *pWal){ int rc; /* Return Code */ i64 nSize; /* Size of log file */ u32 aFrameCksum[2] = {0, 0}; + int iLock; /* Lock offset to lock for checkpoint */ + int nLock; /* Number of locks to hold */ + + /* Obtain an exclusive lock on all byte in the locking range not already + ** locked by the caller. The caller is guaranteed to have locked the + ** WAL_WRITE_LOCK byte, and may have also locked the WAL_CKPT_LOCK byte. + ** If successful, the same bytes that are locked here are unlocked before + ** this function returns. + */ + assert( pWal->ckptLock==1 || pWal->ckptLock==0 ); + assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 ); + assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE ); + assert( pWal->writeLock ); + iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock; + nLock = SQLITE_SHM_NLOCK - iLock; + rc = walLockExclusive(pWal, iLock, nLock); + if( rc ){ + return rc; + } + WALTRACE(("WAL%p: recovery begin...\n", pWal)); - assert( pWal->lockState>SQLITE_SHM_READ ); memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); rc = sqlite3OsFileSize(pWal->pWalFd, &nSize); if( rc!=SQLITE_OK ){ - return rc; + goto recovery_error; } if( nSize>WAL_HDRSIZE ){ @@ -838,7 +1004,7 @@ static int walIndexRecover(Wal *pWal){ /* Read in the WAL header. */ rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0); if( rc!=SQLITE_OK ){ - return rc; + goto recovery_error; } /* If the database page size is not a power of two, or is greater than @@ -867,7 +1033,8 @@ static int walIndexRecover(Wal *pWal){ szFrame = szPage + WAL_FRAME_HDRSIZE; aFrame = (u8 *)sqlite3_malloc(szFrame); if( !aFrame ){ - return SQLITE_NOMEM; + rc = SQLITE_NOMEM; + goto recovery_error; } aData = &aFrame[WAL_FRAME_HDRSIZE]; @@ -907,7 +1074,17 @@ finished: pWal->hdr.aFrameCksum[0] = aFrameCksum[0]; pWal->hdr.aFrameCksum[1] = aFrameCksum[1]; walIndexWriteHdr(pWal); + + /* Zero the checkpoint-header. This is safe because this thread is + ** currently holding locks that exclude all other readers, writers and + ** checkpointers. + */ + memset((void *)walCkptInfo(pWal), 0, sizeof(WalCkptInfo)); } + +recovery_error: + WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok")); + walUnlockExclusive(pWal, iLock, nLock); return rc; } @@ -915,23 +1092,19 @@ finished: ** Close an open wal-index. */ static void walIndexClose(Wal *pWal, int isDelete){ - if( pWal->isWindexOpen ){ - int notUsed; - sqlite3OsShmLock(pWal->pDbFd, SQLITE_SHM_UNLOCK, ¬Used); + if( pWal->isWIndexOpen ){ sqlite3OsShmClose(pWal->pDbFd, isDelete); - pWal->isWindexOpen = 0; + pWal->isWIndexOpen = 0; } } /* -** Open a connection to the log file associated with database zDb. The -** database file does not actually have to exist. zDb is used only to -** figure out the name of the log file to open. If the log file does not -** exist it is created by this call. +** Open a connection to the WAL file associated with database zDbName. +** The database file must already be opened on connection pDbFd. ** ** A SHARED lock should be held on the database file when this function ** is called. The purpose of this SHARED lock is to prevent any other -** client from unlinking the log or wal-index file. If another process +** client from unlinking the WAL or wal-index file. If another process ** were to do this just after this client opened one of these files, the ** system would be badly broken. ** @@ -978,6 +1151,7 @@ int sqlite3WalOpen( pRet->pWalFd = (sqlite3_file *)&pRet[1]; pRet->pDbFd = pDbFd; pRet->szWIndex = -1; + pRet->readLock = -1; sqlite3_randomness(8, &pRet->hdr.aSalt); pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd; sqlite3_snprintf(nWal, zWal, "%s-wal", zDbName); @@ -985,7 +1159,7 @@ int sqlite3WalOpen( /* Open file handle on the write-ahead log file. */ if( rc==SQLITE_OK ){ - pRet->isWindexOpen = 1; + pRet->isWIndexOpen = 1; flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_MAIN_JOURNAL); rc = sqlite3OsOpen(pVfs, zWal, pRet->pWalFd, flags, &flags); } @@ -996,6 +1170,7 @@ int sqlite3WalOpen( sqlite3_free(pRet); }else{ *ppWal = pRet; + WALTRACE(("WAL%d: opened\n", pRet)); } return rc; } @@ -1133,7 +1308,7 @@ static int walIteratorInit(Wal *pWal, WalIterator **pp){ ** running (or, indeed, while the WalIterator object exists). Hence, ** we can cast off the volatile qualifacation from shared memory */ - assert( pWal->lockState==SQLITE_SHM_CHECKPOINT ); + assert( pWal->ckptLock ); aData = (u32*)pWal->pWiData; /* Allocate space for the WalIterator object */ @@ -1180,8 +1355,37 @@ static void walIteratorFree(WalIterator *p){ sqlite3_free(p); } + /* -** Checkpoint the contents of the log file. +** Copy as much content as we can from the WAL back into the database file +** in response to an sqlite3_wal_checkpoint() request or the equivalent. +** +** The amount of information copies from WAL to database might be limited +** by active readers. This routine will never overwrite a database page +** that a concurrent reader might be using. +** +** All I/O barrier operations (a.k.a fsyncs) occur in this routine when +** SQLite is in WAL-mode in synchronous=NORMAL. That means that if +** checkpoints are always run by a background thread or background +** process, foreground threads will never block on a lengthy fsync call. +** +** Fsync is called on the WAL before writing content out of the WAL and +** into the database. This ensures that if the new content is persistent +** in the WAL and can be recovered following a power-loss or hard reset. +** +** Fsync is also called on the database file if (and only if) the entire +** WAL content is copied into the database file. This second fsync makes +** it safe to delete the WAL since the new content will persist in the +** database file. +** +** This routine uses and updates the nBackfill field of the wal-index header. +** This is the only routine tha will increase the value of nBackfill. +** (A WAL reset or recovery will revert nBackfill to zero, but not increase +** its value.) +** +** The caller must be holding sufficient locks to ensure that no other +** checkpoint is running (in any other thread or process) at the same +** time. */ static int walCheckpoint( Wal *pWal, /* Wal connection */ @@ -1194,51 +1398,88 @@ static int walCheckpoint( WalIterator *pIter = 0; /* Wal iterator context */ u32 iDbpage = 0; /* Next database page to write */ u32 iFrame = 0; /* Wal frame containing data for iDbpage */ + u32 mxSafeFrame; /* Max frame that can be backfilled */ + int i; /* Loop counter */ + volatile WalIndexHdr *pHdr; /* The actual wal-index header in SHM */ + volatile WalCkptInfo *pInfo; /* The checkpoint status information */ /* Allocate the iterator */ rc = walIteratorInit(pWal, &pIter); if( rc!=SQLITE_OK || pWal->hdr.mxFrame==0 ){ - goto out; + walIteratorFree(pIter); + return rc; } + /*** TODO: Move this test out to the caller. Make it an assert() here ***/ if( pWal->hdr.szPage!=nBuf ){ - rc = SQLITE_CORRUPT_BKPT; - goto out; + walIteratorFree(pIter); + return SQLITE_CORRUPT_BKPT; } - /* Sync the log file to disk */ - if( sync_flags ){ - rc = sqlite3OsSync(pWal->pWalFd, sync_flags); - if( rc!=SQLITE_OK ) goto out; + /* Compute in mxSafeFrame the index of the last frame of the WAL that is + ** safe to write into the database. Frames beyond mxSafeFrame might + ** overwrite database pages that are in use by active readers and thus + ** cannot be backfilled from the WAL. + */ + mxSafeFrame = pWal->hdr.mxFrame; + pHdr = (volatile WalIndexHdr*)pWal->pWiData; + pInfo = (volatile WalCkptInfo*)&pHdr[2]; + assert( pInfo==walCkptInfo(pWal) ); + for(i=1; iaReadMark[i]; + if( y>0 && (mxSafeFrame==0 || mxSafeFrame>=y) ){ + if( y<=pWal->hdr.mxFrame + && walLockExclusive(pWal, WAL_READ_LOCK(i), 1)==SQLITE_OK + ){ + pInfo->aReadMark[i] = 0; + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + }else{ + mxSafeFrame = y-1; + } + } } - /* Iterate through the contents of the log, copying data to the db file. */ - while( 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ - rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, - walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE - ); - if( rc!=SQLITE_OK ) goto out; - rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, (iDbpage-1)*szPage); - if( rc!=SQLITE_OK ) goto out; + if( pInfo->nBackfillnBackfill; + + /* Sync the WAL to disk */ + if( sync_flags ){ + rc = sqlite3OsSync(pWal->pWalFd, sync_flags); + } + + /* Iterate through the contents of the WAL, copying data to the db file. */ + while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ + if( iFrame<=nBackfill || iFrame>mxSafeFrame ) continue; + rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, + walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE + ); + if( rc!=SQLITE_OK ) break; + rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, (iDbpage-1)*szPage); + if( rc!=SQLITE_OK ) break; + } + + /* If work was actually accomplished... */ + if( rc==SQLITE_OK && pInfo->nBackfillnBackfill = mxSafeFrame; + if( mxSafeFrame==pHdr[0].mxFrame && sync_flags ){ + rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)szPage)); + if( rc==SQLITE_OK && sync_flags ){ + rc = sqlite3OsSync(pWal->pDbFd, sync_flags); + } + } + } + + /* Release the reader lock held while backfilling */ + walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); + }else{ + /* Reset the return code so as not to report a checkpoint failure + ** just because active readers prevent any backfill. + */ + rc = SQLITE_OK; } - /* Truncate the database file */ - rc = sqlite3OsTruncate(pWal->pDbFd, ((i64)pWal->hdr.nPage*(i64)szPage)); - if( rc!=SQLITE_OK ) goto out; - - /* Sync the database file. If successful, update the wal-index. */ - if( sync_flags ){ - rc = sqlite3OsSync(pWal->pDbFd, sync_flags); - if( rc!=SQLITE_OK ) goto out; - } - pWal->hdr.mxFrame = 0; - pWal->nCkpt++; - sqlite3Put4byte((u8*)pWal->hdr.aSalt, - 1 + sqlite3Get4byte((u8*)pWal->hdr.aSalt)); - sqlite3_randomness(4, &pWal->hdr.aSalt[1]); - walIndexWriteHdr(pWal); - - out: walIteratorFree(pIter); return rc; } @@ -1266,7 +1507,8 @@ int sqlite3WalClose( */ rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE); if( rc==SQLITE_OK ){ - rc = sqlite3WalCheckpoint(pWal, sync_flags, nBuf, zBuf, 0, 0); + pWal->exclusiveMode = 1; + rc = sqlite3WalCheckpoint(pWal, sync_flags, nBuf, zBuf); if( rc==SQLITE_OK ){ isDelete = 1; } @@ -1278,6 +1520,7 @@ int sqlite3WalClose( if( isDelete ){ sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0); } + WALTRACE(("WAL%p: closed\n", pWal)); sqlite3_free(pWal); } return rc; @@ -1290,7 +1533,8 @@ int sqlite3WalClose( ** The wal-index is in shared memory. Another thread or process might ** be writing the header at the same time this procedure is trying to ** read it, which might result in inconsistency. A dirty read is detected -** by verifying a checksum on the header. +** by verifying that both copies of the header are the same and also by +** a checksum on the header. ** ** If and only if the read is consistent and the header is different from ** pWal->hdr, then pWal->hdr is updated to the content of the new header @@ -1311,9 +1555,9 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){ } assert( pWal->pWiData ); - /* Read the header. The caller may or may not have an exclusive - ** (WRITE, PENDING, CHECKPOINT or RECOVER) lock on the wal-index - ** file, meaning it is possible that an inconsistent snapshot is read + /* Read the header. This might happen currently with a write to the + ** same area of shared memory on a different CPU in a SMP, + ** meaning it is possible that an inconsistent snapshot is read ** from the file. If this happens, return non-zero. ** ** There are two copies of the header at the beginning of the wal-index. @@ -1329,7 +1573,7 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){ if( memcmp(&h1, &h2, sizeof(h1))!=0 ){ return 1; /* Dirty read */ } - if( h1.szPage==0 ){ + if( h1.isInit==0 ){ return 1; /* Malformed header - probably all zeros */ } walChecksumBytes(1, (u8*)&h1, sizeof(h1)-sizeof(h1.aCksum), 0, aCksum); @@ -1367,52 +1611,40 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){ */ static int walIndexReadHdr(Wal *pWal, int *pChanged){ int rc; /* Return code */ - int lockState; /* pWal->lockState before running recovery */ + int badHdr; /* True if a header read failed */ - assert( pWal->lockState>=SQLITE_SHM_READ ); assert( pChanged ); rc = walIndexMap(pWal, walMappingSize(1)); if( rc!=SQLITE_OK ){ return rc; } - /* First attempt to read the wal-index header. This may fail for one - ** of two reasons: (a) the wal-index does not yet exist or has been - ** corrupted and needs to be constructed by running recovery, or (b) - ** the caller is only holding a READ lock and made a dirty read of - ** the wal-index header. - ** - ** A dirty read of the wal-index header occurs if another thread or - ** process happens to be writing to the wal-index header at roughly - ** the same time as this thread is reading it. In this case it is - ** possible that an inconsistent header is read (which is detected - ** using the header checksum mechanism). + /* Try once to read the header straight out. This works most of the + ** time. */ - if( walIndexTryHdr(pWal, pChanged)!=0 ){ + badHdr = walIndexTryHdr(pWal, pChanged); - /* If the first attempt to read the header failed, lock the wal-index - ** file with an exclusive lock and try again. If the header checksum - ** verification fails again, we can be sure that it is not simply a - ** dirty read, but that the wal-index really does need to be - ** reconstructed by running log recovery. - ** - ** In the paragraph above, an "exclusive lock" may be any of WRITE, - ** PENDING, CHECKPOINT or RECOVER. If any of these are already held, - ** no locking operations are required. If the caller currently holds - ** a READ lock, then upgrade to a RECOVER lock before re-reading the - ** wal-index header and revert to a READ lock before returning. - */ - lockState = pWal->lockState; - if( lockState>SQLITE_SHM_READ - || SQLITE_OK==(rc = walSetLock(pWal, SQLITE_SHM_RECOVER)) - ){ - if( walIndexTryHdr(pWal, pChanged) ){ - *pChanged = 1; + /* If the first attempt failed, it might have been due to a race + ** with a writer. So get a WRITE lock and try again. + */ + assert( badHdr==0 || pWal->writeLock==0 ); + if( badHdr ){ + rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); + if( rc==SQLITE_OK ){ + pWal->writeLock = 1; + badHdr = walIndexTryHdr(pWal, pChanged); + if( badHdr ){ + /* If the wal-index header is still malformed even while holding + ** a WRITE lock, it can only mean that the header is corrupted and + ** needs to be reconstructed. So run recovery to do exactly that. + */ rc = walIndexRecover(pWal); + *pChanged = 1; } - if( lockState==SQLITE_SHM_READ ){ - walSetLock(pWal, SQLITE_SHM_READ); - } + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + pWal->writeLock = 0; + }else if( rc!=SQLITE_BUSY ){ + return rc; } } @@ -1428,53 +1660,192 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){ } /* -** Take a snapshot of the state of the WAL and wal-index for the current -** instant in time. The current thread will continue to use this snapshot. -** Other threads might containing appending to the WAL and wal-index but -** the extra content appended will be ignored by the current thread. -** -** A snapshot is like a read transaction. -** -** No other threads are allowed to run a checkpoint while this thread is -** holding the snapshot since a checkpoint would remove data out from under -** this thread. -** -** If this call obtains a new read-lock and the database contents have been -** modified since the most recent call to WalCloseSnapshot() on this Wal -** connection, then *pChanged is set to 1 before returning. Otherwise, it -** is left unmodified. This is used by the pager layer to determine whether -** or not any cached pages may be safely reused. +** This is the value that walTryBeginRead returns when it needs to +** be retried. */ -int sqlite3WalOpenSnapshot(Wal *pWal, int *pChanged){ - int rc; /* Return code */ +#define WAL_RETRY (-1) - rc = walSetLock(pWal, SQLITE_SHM_READ); - assert( rc!=SQLITE_OK || pWal->lockState==SQLITE_SHM_READ ); +/* +** Attempt to start a read transaction. This might fail due to a race or +** other transient condition. When that happens, it returns WAL_RETRY to +** indicate to the caller that it is safe to retry immediately. +** +** On success return SQLITE_OK. On a permantent failure (such an +** I/O error or an SQLITE_BUSY because another process is running +** recovery) return a positive error code. +** +** On success, this routine obtains a read lock on +** WAL_READ_LOCK(pWal->readLock). The pWal->readLock integer is +** in the range 0 <= pWal->readLock < WAL_NREADER. If pWal->readLock==(-1) +** that means the Wal does not hold any read lock. The reader must not +** access any database page that is modified by a WAL frame up to and +** including frame number aReadMark[pWal->readLock]. The reader will +** use WAL frames up to and including pWal->hdr.mxFrame if pWal->readLock>0 +** Or if pWal->readLock==0, then the reader will ignore the WAL +** completely and get all content directly from the database file. +** When the read transaction is completed, the caller must release the +** lock on WAL_READ_LOCK(pWal->readLock) and set pWal->readLock to -1. +** +** This routine uses the nBackfill and aReadMark[] fields of the header +** to select a particular WAL_READ_LOCK() that strives to let the +** checkpoint process do as much work as possible. This routine might +** update values of the aReadMark[] array in the header, but if it does +** so it takes care to hold an exclusive lock on the corresponding +** WAL_READ_LOCK() while changing values. +*/ +static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal){ + volatile WalIndexHdr *pHdr; /* Header of the wal-index */ + volatile WalCkptInfo *pInfo; /* Checkpoint information in wal-index */ + u32 mxReadMark; /* Largest aReadMark[] value */ + int mxI; /* Index of largest aReadMark[] value */ + int i; /* Loop counter */ + int rc; /* Return code */ - if( rc==SQLITE_OK ){ + assert( pWal->readLock<0 ); /* Not currently locked */ + + if( !useWal ){ rc = walIndexReadHdr(pWal, pChanged); - if( rc!=SQLITE_OK ){ - /* An error occured while attempting log recovery. */ - sqlite3WalCloseSnapshot(pWal); + if( rc==SQLITE_BUSY ){ + /* If there is not a recovery running in another thread or process + ** then convert BUSY errors to WAL_RETRY. If recovery is known to + ** be running, convert BUSY to BUSY_RECOVERY. There is a race here + ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY + ** would be technically correct. But the race is benign since with + ** WAL_RETRY this routine will be called again and will probably be + ** right on the second iteration. + */ + rc = walLockShared(pWal, WAL_RECOVER_LOCK); + if( rc==SQLITE_OK ){ + walUnlockShared(pWal, WAL_RECOVER_LOCK); + rc = WAL_RETRY; + }else if( rc==SQLITE_BUSY ){ + rc = SQLITE_BUSY_RECOVERY; + } + } + }else{ + rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)); + } + if( rc!=SQLITE_OK ){ + return rc; + } + + pHdr = (volatile WalIndexHdr*)pWal->pWiData; + pInfo = (volatile WalCkptInfo*)&pHdr[2]; + assert( pInfo==walCkptInfo(pWal) ); + if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){ + /* The WAL has been completely backfilled (or it is empty). + ** and can be safely ignored. + */ + rc = walLockShared(pWal, WAL_READ_LOCK(0)); + if( rc==SQLITE_OK ){ + if( pHdr->mxFrame!=pWal->hdr.mxFrame ){ + walUnlockShared(pWal, WAL_READ_LOCK(0)); + return WAL_RETRY; + } + pWal->readLock = 0; + return SQLITE_OK; + }else if( rc!=SQLITE_BUSY ){ + return rc; } } + /* If we get this far, it means that the reader will want to use + ** the WAL to get at content from recent commits. The job now is + ** to select one of the aReadMark[] entries that is closest to + ** but not exceeding pWal->hdr.mxFrame and lock that entry. + */ + mxReadMark = 0; + mxI = 0; + for(i=1; iaReadMark[i]; + if( mxReadMarkaReadMark[1] = pWal->hdr.mxFrame+1; + walUnlockExclusive(pWal, WAL_READ_LOCK(1), 1); + } + return WAL_RETRY; + }else{ + if( mxReadMark < pWal->hdr.mxFrame ){ + for(i=1; iaReadMark[i] = pWal->hdr.mxFrame+1; + mxI = i; + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + break; + } + } + } + + rc = walLockShared(pWal, WAL_READ_LOCK(mxI)); + if( rc ){ + return rc==SQLITE_BUSY ? WAL_RETRY : rc; + } + if( pInfo->aReadMark[mxI]!=mxReadMark + || pHdr[0].mxFrame!=pWal->hdr.mxFrame + || (sqlite3OsShmBarrier(pWal->pDbFd), pHdr[1].mxFrame!=pWal->hdr.mxFrame) + ){ + walUnlockShared(pWal, WAL_READ_LOCK(mxI)); + return WAL_RETRY; + }else{ + pWal->readLock = mxI; + } + } + return rc; +} + +/* +** Begin a read transaction on the database. +** +** This routine used to be called sqlite3OpenSnapshot() and with good reason: +** it takes a snapshot of the state of the WAL and wal-index for the current +** instant in time. The current thread will continue to use this snapshot. +** Other threads might append new content to the WAL and wal-index but +** that extra content is ignored by the current thread. +** +** If the database contents have changes since the previous read +** transaction, then *pChanged is set to 1 before returning. The +** Pager layer will use this to know that is cache is stale and +** needs to be flushed. +*/ +int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){ + int rc; /* Return code */ + + do{ + rc = walTryBeginRead(pWal, pChanged, 0); + }while( rc==WAL_RETRY ); walIndexUnmap(pWal); return rc; } /* -** Unlock the current snapshot. +** Finish with a read transaction. All this does is release the +** read-lock. */ -void sqlite3WalCloseSnapshot(Wal *pWal){ - assert( pWal->lockState==SQLITE_SHM_READ - || pWal->lockState==SQLITE_SHM_UNLOCK - ); - walSetLock(pWal, SQLITE_SHM_UNLOCK); +void sqlite3WalEndReadTransaction(Wal *pWal){ + if( pWal->readLock>=0 ){ + walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); + pWal->readLock = -1; + } } /* -** Read a page from the log, if it is present. +** Read a page from the WAL, if it is present in the WAL and if the +** current read transaction is configured to use the WAL. +** +** The *pInWal is set to 1 if the requested page is in the WAL and +** has been loaded. Or *pInWal is set to 0 if the page was not in +** the WAL and needs to be read out of the database. */ int sqlite3WalRead( Wal *pWal, /* WAL handle */ @@ -1488,17 +1859,21 @@ int sqlite3WalRead( u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ int iHash; /* Used to loop through N hash tables */ + /* This routine is only called from within a read transaction */ + assert( pWal->readLock>=0 ); + /* If the "last page" field of the wal-index header snapshot is 0, then ** no data will be read from the wal under any circumstances. Return early - ** in this case to avoid the walIndexMap/Unmap overhead. + ** in this case to avoid the walIndexMap/Unmap overhead. Likewise, if + ** pWal->readLock==0, then the WAL is ignored by the reader so + ** return early, as if the WAL were empty. */ - if( iLast==0 ){ + if( iLast==0 || pWal->readLock==0 ){ *pInWal = 0; return SQLITE_OK; } /* Ensure the wal-index is mapped. */ - assert( pWal->lockState==SQLITE_SHM_READ||pWal->lockState==SQLITE_SHM_WRITE ); rc = walIndexMap(pWal, walMappingSize(iLast)); if( rc!=SQLITE_OK ){ return rc; @@ -1607,51 +1982,107 @@ int sqlite3WalRead( ** Set *pPgno to the size of the database file (or zero, if unknown). */ void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){ - assert( pWal->lockState==SQLITE_SHM_READ - || pWal->lockState==SQLITE_SHM_WRITE ); + assert( pWal->readLock>=0 ); *pPgno = pWal->hdr.nPage; } -/* -** This function returns SQLITE_OK if the caller may write to the database. -** Otherwise, if the caller is operating on a snapshot that has already -** been overwritten by another writer, SQLITE_BUSY is returned. -*/ -int sqlite3WalWriteLock(Wal *pWal, int op){ - int rc = SQLITE_OK; - if( op ){ - assert( pWal->lockState==SQLITE_SHM_READ ); - rc = walSetLock(pWal, SQLITE_SHM_WRITE); - /* If this connection is not reading the most recent database snapshot, - ** it is not possible to write to the database. In this case release - ** the write locks and return SQLITE_BUSY. - */ - if( rc==SQLITE_OK ){ - rc = walIndexMap(pWal, walMappingSize(1)); - assert( pWal->szWIndex>=WALINDEX_HDR_SIZE || rc!=SQLITE_OK ); - if( rc==SQLITE_OK - && memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr)) - ){ - rc = SQLITE_BUSY; - } - walIndexUnmap(pWal); - if( rc!=SQLITE_OK ){ - walSetLock(pWal, SQLITE_SHM_READ); +/* +** This function starts a write transaction on the WAL. +** +** A read transaction must have already been started by a prior call +** to sqlite3WalBeginReadTransaction(). +** +** If another thread or process has written into the database since +** the read transaction was started, then it is not possible for this +** thread to write as doing so would cause a fork. So this routine +** returns SQLITE_BUSY in that case and no write transaction is started. +** +** There can only be a single writer active at a time. +*/ +int sqlite3WalBeginWriteTransaction(Wal *pWal){ + int rc; + volatile WalCkptInfo *pInfo; + + /* Cannot start a write transaction without first holding a read + ** transaction. */ + assert( pWal->readLock>=0 ); + + /* Only one writer allowed at a time. Get the write lock. Return + ** SQLITE_BUSY if unable. + */ + rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1); + if( rc ){ + return rc; + } + pWal->writeLock = 1; + + /* If another connection has written to the database file since the + ** time the read transaction on this connection was started, then + ** the write is disallowed. + */ + rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)); + if( rc ){ + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + pWal->writeLock = 0; + return rc; + } + if( memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))!=0 ){ + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + pWal->writeLock = 0; + walIndexUnmap(pWal); + return SQLITE_BUSY; + } + + pInfo = walCkptInfo(pWal); + if( pWal->readLock==0 ){ + assert( pInfo->nBackfill==pWal->hdr.mxFrame ); + if( pInfo->nBackfill>0 ){ + rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); + if( rc==SQLITE_OK ){ + /* If all readers are using WAL_READ_LOCK(0) (in other words if no + ** readers are currently using the WAL) */ + pWal->nCkpt++; + pWal->hdr.mxFrame = 0; + sqlite3Put4byte((u8*)pWal->hdr.aSalt, + 1 + sqlite3Get4byte((u8*)pWal->hdr.aSalt)); + sqlite3_randomness(4, &pWal->hdr.aSalt[1]); + walIndexWriteHdr(pWal); + pInfo->nBackfill = 0; + memset((void*)&pInfo->aReadMark[1], 0, + sizeof(pInfo->aReadMark)-sizeof(u32)); + rc = sqlite3OsTruncate(pWal->pDbFd, + ((i64)pWal->hdr.nPage*(i64)pWal->szPage)); + walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); } } - }else if( pWal->lockState==SQLITE_SHM_WRITE ){ - rc = walSetLock(pWal, SQLITE_SHM_READ); + walUnlockShared(pWal, WAL_READ_LOCK(0)); + pWal->readLock = -1; + do{ + int notUsed; + rc = walTryBeginRead(pWal, ¬Used, 1); + }while( rc==WAL_RETRY ); } + walIndexUnmap(pWal); return rc; } +/* +** End a write transaction. The commit has already been done. This +** routine merely releases the lock. +*/ +int sqlite3WalEndWriteTransaction(Wal *pWal){ + walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); + pWal->writeLock = 0; + return SQLITE_OK; +} + /* ** If any data has been written (but not committed) to the log file, this ** function moves the write-pointer back to the start of the transaction. ** ** Additionally, the callback function is invoked for each frame written -** to the log since the start of the transaction. If the callback returns +** to the WAL since the start of the transaction. If the callback returns ** other than SQLITE_OK, it is not invoked again and the error code is ** returned to the caller. ** @@ -1660,7 +2091,7 @@ int sqlite3WalWriteLock(Wal *pWal, int op){ */ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ int rc = SQLITE_OK; - if( pWal->lockState==SQLITE_SHM_WRITE ){ + if( pWal->writeLock ){ int unused; Pgno iMax = pWal->hdr.mxFrame; Pgno iFrame; @@ -1672,7 +2103,7 @@ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ } if( rc==SQLITE_OK ){ for(iFrame=pWal->hdr.mxFrame+1; rc==SQLITE_OK && iFrame<=iMax; iFrame++){ - assert( pWal->lockState==SQLITE_SHM_WRITE ); + assert( pWal->writeLock ); rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]); } walCleanupHash(pWal); @@ -1689,7 +2120,7 @@ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){ ** point in the event of a savepoint rollback (via WalSavepointUndo()). */ void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){ - assert( pWal->lockState==SQLITE_SHM_WRITE ); + assert( pWal->writeLock ); aWalData[0] = pWal->hdr.mxFrame; aWalData[1] = pWal->hdr.aFrameCksum[0]; aWalData[2] = pWal->hdr.aFrameCksum[1]; @@ -1703,7 +2134,7 @@ void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){ */ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){ int rc = SQLITE_OK; - assert( pWal->lockState==SQLITE_SHM_WRITE ); + assert( pWal->writeLock ); assert( aWalData[0]<=pWal->hdr.mxFrame ); if( aWalData[0]hdr.mxFrame ){ @@ -1739,9 +2170,16 @@ int sqlite3WalFrames( int nLast = 0; /* Number of extra copies of last page */ assert( pList ); - assert( pWal->lockState==SQLITE_SHM_WRITE ); + assert( pWal->writeLock ); assert( pWal->pWiData==0 ); +#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) + { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} + WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n", + pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill")); + } +#endif + /* If this is the first frame written into the log, write the WAL ** header to the start of the WAL file. See comments at the top of ** this source file for a description of the WAL header format. @@ -1757,6 +2195,7 @@ int sqlite3WalFrames( sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt); memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8); rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0); + WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok")); if( rc!=SQLITE_OK ){ return rc; } @@ -1848,48 +2287,38 @@ int sqlite3WalFrames( } walIndexUnmap(pWal); + WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok")); return rc; } /* -** Checkpoint the database: +** This routine is called to implement sqlite3_wal_checkpoint() and +** related interfaces. ** -** 1. Acquire a CHECKPOINT lock -** 2. Copy the contents of the log into the database file. -** 3. Zero the wal-index header (so new readers will ignore the log). -** 4. Drop the CHECKPOINT lock. +** Obtain a CHECKPOINT lock and then backfill as much information as +** we can from WAL into the database. */ int sqlite3WalCheckpoint( Wal *pWal, /* Wal connection */ int sync_flags, /* Flags to sync db file with (or 0) */ int nBuf, /* Size of temporary buffer */ - u8 *zBuf, /* Temporary buffer to use */ - int (*xBusyHandler)(void *), /* Pointer to busy-handler function */ - void *pBusyHandlerArg /* Argument to pass to xBusyHandler */ + u8 *zBuf /* Temporary buffer to use */ ){ int rc; /* Return code */ int isChanged = 0; /* True if a new wal-index header is loaded */ assert( pWal->pWiData==0 ); + assert( pWal->ckptLock==0 ); - /* Get the CHECKPOINT lock. - ** - ** Normally, the connection will be in UNLOCK state at this point. But - ** if the connection is in exclusive-mode it may still be in READ state - ** even though the upper layer has no active read-transaction (because - ** WalCloseSnapshot() is not called in exclusive mode). The state will - ** be set to UNLOCK when this function returns. This is Ok. - */ - assert( (pWal->lockState==SQLITE_SHM_UNLOCK) - || (pWal->lockState==SQLITE_SHM_READ) ); - walSetLock(pWal, SQLITE_SHM_UNLOCK); - do { - rc = walSetLock(pWal, SQLITE_SHM_CHECKPOINT); - }while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) ); - if( rc!=SQLITE_OK ){ - walSetLock(pWal, SQLITE_SHM_UNLOCK); + WALTRACE(("WAL%p: checkpoint begins\n", pWal)); + rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); + if( rc ){ + /* Usually this is SQLITE_BUSY meaning that another thread or process + ** is already running a checkpoint, or maybe a recovery. But it might + ** also be SQLITE_IOERR. */ return rc; } + pWal->ckptLock = 1; /* Copy data from the log to the database file. */ rc = walIndexReadHdr(pWal, &isChanged); @@ -1908,7 +2337,9 @@ int sqlite3WalCheckpoint( /* Release the locks. */ walIndexUnmap(pWal); - walSetLock(pWal, SQLITE_SHM_UNLOCK); + walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); + pWal->ckptLock = 0; + WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok")); return rc; } @@ -1927,33 +2358,54 @@ int sqlite3WalCallback(Wal *pWal){ } /* -** This function is called to set or query the exclusive-mode flag -** associated with the WAL connection passed as the first argument. The -** exclusive-mode flag should be set to indicate that the caller is -** holding an EXCLUSIVE lock on the database file (it does this in -** locking_mode=exclusive mode). If the EXCLUSIVE lock is to be dropped, -** the flag set by this function should be cleared before doing so. +** This function is called to change the WAL subsystem into or out +** of locking_mode=EXCLUSIVE. ** -** The value of the exclusive-mode flag may only be modified when -** the WAL connection is in READ state. +** If op is zero, then attempt to change from locking_mode=EXCLUSIVE +** into locking_mode=NORMAL. This means that we must acquire a lock +** on the pWal->readLock byte. If the WAL is already in locking_mode=NORMAL +** or if the acquisition of the lock fails, then return 0. If the +** transition out of exclusive-mode is successful, return 1. This +** operation must occur while the pager is still holding the exclusive +** lock on the main database file. ** -** When the flag is set, this module does not call the VFS xShmLock() -** method to obtain any locks on the wal-index (as it assumes it -** has exclusive access to the wal and wal-index files anyhow). It -** continues to hold (and does not drop) the existing READ lock on -** the wal-index. +** If op is one, then change from locking_mode=NORMAL into +** locking_mode=EXCLUSIVE. This means that the pWal->readLock must +** be released. Return 1 if the transition is made and 0 if the +** WAL is already in exclusive-locking mode - meaning that this +** routine is a no-op. The pager must already hold the exclusive lock +** on the main database file before invoking this operation. ** -** To set or clear the flag, the "op" parameter is passed 1 or 0, -** respectively. To query the flag, pass -1. In all cases, the value -** returned is the value of the exclusive-mode flag (after its value -** has been modified, if applicable). +** If op is negative, then do a dry-run of the op==1 case but do +** not actually change anything. The pager uses this to see if it +** should acquire the database exclusive lock prior to invoking +** the op==1 case. */ int sqlite3WalExclusiveMode(Wal *pWal, int op){ - if( op>=0 ){ - assert( pWal->lockState==SQLITE_SHM_READ ); - pWal->exclusiveMode = (u8)op; + int rc; + assert( pWal->writeLock==0 && pWal->readLock>=0 ); + if( op==0 ){ + if( pWal->exclusiveMode ){ + pWal->exclusiveMode = 0; + if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){ + pWal->exclusiveMode = 1; + } + rc = pWal->exclusiveMode==0; + }else{ + /* No changes. Either already in locking_mode=NORMAL or else the + ** acquisition of the read-lock failed. The pager must continue to + ** hold the database exclusive lock. */ + rc = 0; + } + }else if( op>0 ){ + assert( pWal->exclusiveMode==0 ); + walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock)); + pWal->exclusiveMode = 1; + rc = 1; + }else{ + rc = pWal->exclusiveMode==0; } - return pWal->exclusiveMode; + return rc; } #endif /* #ifndef SQLITE_OMIT_WAL */ diff --git a/src/wal.h b/src/wal.h index bf40c3bd9f..32aade1d0d 100644 --- a/src/wal.h +++ b/src/wal.h @@ -20,19 +20,20 @@ #include "sqliteInt.h" #ifdef SQLITE_OMIT_WAL -# define sqlite3WalOpen(x,y,z) 0 -# define sqlite3WalClose(w,x,y,z) 0 -# define sqlite3WalOpenSnapshot(y,z) 0 -# define sqlite3WalCloseSnapshot(z) -# define sqlite3WalRead(v,w,x,y,z) 0 +# define sqlite3WalOpen(x,y,z) 0 +# define sqlite3WalClose(w,x,y,z) 0 +# define sqlite3WalBeginReadTransaction(y,z) 0 +# define sqlite3WalEndReadTransaction(z) +# define sqlite3WalRead(v,w,x,y,z) 0 # define sqlite3WalDbsize(y,z) -# define sqlite3WalWriteLock(y,z) 0 -# define sqlite3WalUndo(x,y,z) 0 +# define sqlite3WalBeginWriteTransaction(y) 0 +# define sqlite3WalEndWRiteTransaction(x) 0 +# define sqlite3WalUndo(x,y,z) 0 # define sqlite3WalSavepoint(y,z) -# define sqlite3WalSavepointUndo(y,z) 0 -# define sqlite3WalFrames(u,v,w,x,y,z) 0 -# define sqlite3WalCheckpoint(u,v,w,x,y,z) 0 -# define sqlite3WalCallback(z) 0 +# define sqlite3WalSavepointUndo(y,z) 0 +# define sqlite3WalFrames(u,v,w,x,y,z) 0 +# define sqlite3WalCheckpoint(u,v,w,x) 0 +# define sqlite3WalCallback(z) 0 #else #define WAL_SAVEPOINT_NDATA 3 @@ -53,8 +54,8 @@ int sqlite3WalClose(Wal *pWal, int sync_flags, int, u8 *); ** write to or checkpoint the WAL. sqlite3WalCloseSnapshot() closes the ** transaction and releases the lock. */ -int sqlite3WalOpenSnapshot(Wal *pWal, int *); -void sqlite3WalCloseSnapshot(Wal *pWal); +int sqlite3WalBeginReadTransaction(Wal *pWal, int *); +void sqlite3WalEndReadTransaction(Wal *pWal); /* Read a page from the write-ahead log, if it is present. */ int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, int nOut, u8 *pOut); @@ -64,7 +65,8 @@ int sqlite3WalRead(Wal *pWal, Pgno pgno, int *pInWal, int nOut, u8 *pOut); void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno); /* Obtain or release the WRITER lock. */ -int sqlite3WalWriteLock(Wal *pWal, int op); +int sqlite3WalBeginWriteTransaction(Wal *pWal); +int sqlite3WalEndWriteTransaction(Wal *pWal); /* Undo any frames written (but not committed) to the log */ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx); @@ -85,9 +87,7 @@ int sqlite3WalCheckpoint( Wal *pWal, /* Write-ahead log connection */ int sync_flags, /* Flags to sync db file with (or 0) */ int nBuf, /* Size of buffer nBuf */ - u8 *zBuf, /* Temporary buffer to use */ - int (*xBusyHandler)(void *), /* Pointer to busy-handler function */ - void *pBusyHandlerArg /* Argument to pass to xBusyHandler */ + u8 *zBuf /* Temporary buffer to use */ ); /* Return the value to pass to a sqlite3_wal_hook callback, the diff --git a/test/filectrl.test b/test/filectrl.test index fe89a62635..67e81830c4 100644 --- a/test/filectrl.test +++ b/test/filectrl.test @@ -34,7 +34,7 @@ do_test filectrl-1.4 { do_test filectrl-1.5 { db close sqlite3 db test_control_lockproxy.db - file_control_lockproxy_test db + file_control_lockproxy_test db [pwd] } {} db close file delete -force .test_control_lockproxy.db-conch test.proxy diff --git a/test/lock_common.tcl b/test/lock_common.tcl index b2e4184cc4..0683bbd389 100644 --- a/test/lock_common.tcl +++ b/test/lock_common.tcl @@ -47,7 +47,13 @@ proc testfixture {chan cmd} { } proc testfixture_nb_cb {varname chan} { - set line [gets $chan] + if {[eof $chan]} { + append ::tfnb($chan) "ERROR: Child process hung up" + set line "OVER" + } else { + set line [gets $chan] + } + if { $line == "OVER" } { set $varname $::tfnb($chan) unset ::tfnb($chan) diff --git a/test/wal.test b/test/wal.test index fbe0bd3ee7..1c5f556309 100644 --- a/test/wal.test +++ b/test/wal.test @@ -571,70 +571,49 @@ foreach code [list { } {1 2 3 4 5 6 7 8 9 10} do_test wal-10.$tn.12 { catchsql { PRAGMA wal_checkpoint } - } {1 {database is locked}} + } {0 {}} ;# Reader no longer block checkpoints do_test wal-10.$tn.13 { execsql { INSERT INTO t1 VALUES(11, 12) } sql2 {SELECT * FROM t1} } {1 2 3 4 5 6 7 8 9 10} - # Connection [db2] is holding a lock on a snapshot, preventing [db] from - # checkpointing the database. Add a busy-handler to [db]. If [db2] completes - # its transaction from within the busy-handler, [db] is able to complete - # the checkpoint operation. + # Writers do not block checkpoints any more either. # - proc busyhandler x { - if {$x==4} { sql2 COMMIT } - if {$x<5} { return 0 } - return 1 - } - db busy busyhandler do_test wal-10.$tn.14 { - execsql { PRAGMA wal_checkpoint } - } {} + catchsql { PRAGMA wal_checkpoint } + } {0 {}} - # Similar to the test above. Except this time, a new read transaction is - # started (db3) while the checkpointer is waiting for an old one (db2) to - # finish. The checkpointer can finish, but any subsequent write operations - # must wait until after db3 has closed the read transaction, as db3 is a - # "region D" writer. + # The following series of test cases used to verify another blocking + # case in WAL - a case which no longer blocks. # - db busy {} do_test wal-10.$tn.15 { - sql2 { BEGIN; SELECT * FROM t1; } + sql2 { COMMIT; BEGIN; SELECT * FROM t1; } } {1 2 3 4 5 6 7 8 9 10 11 12} do_test wal-10.$tn.16 { catchsql { PRAGMA wal_checkpoint } - } {1 {database is locked}} - proc busyhandler x { - if {$x==3} { sql3 { BEGIN; SELECT * FROM t1 } } - if {$x==4} { sql2 COMMIT } - if {$x<5} { return 0 } - return 1 - } - db busy busyhandler + } {0 {}} do_test wal-10.$tn.17 { execsql { PRAGMA wal_checkpoint } } {} do_test wal-10.$tn.18 { - sql3 { SELECT * FROM t1 } + sql3 { BEGIN; SELECT * FROM t1 } } {1 2 3 4 5 6 7 8 9 10 11 12} do_test wal-10.$tn.19 { catchsql { INSERT INTO t1 VALUES(13, 14) } - } {1 {database is locked}} + } {0 {}} do_test wal-10.$tn.20 { execsql { SELECT * FROM t1 } - } {1 2 3 4 5 6 7 8 9 10 11 12} + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14} do_test wal-10.$tn.21 { sql3 COMMIT + sql2 COMMIT } {} do_test wal-10.$tn.22 { - execsql { INSERT INTO t1 VALUES(13, 14) } execsql { SELECT * FROM t1 } } {1 2 3 4 5 6 7 8 9 10 11 12 13 14} - # Set [db3] up as a "region D" reader again. Then upgrade it to a writer - # and back down to a reader. Then, check that a checkpoint is not possible - # (as [db3] still has a snapshot locked). + # Another series of tests that used to demonstrate blocking behavior + # but which now work. # do_test wal-10.$tn.23 { execsql { PRAGMA wal_checkpoint } @@ -647,23 +626,21 @@ foreach code [list { } {} do_test wal-10.$tn.26 { catchsql { INSERT INTO t1 VALUES(15, 16) } - } {1 {database is locked}} + } {0 {}} do_test wal-10.$tn.27 { - sql3 { INSERT INTO t1 VALUES(15, 16) } + sql3 { INSERT INTO t1 VALUES(17, 18) } } {} do_test wal-10.$tn.28 { code3 { set ::STMT [sqlite3_prepare db3 "SELECT * FROM t1" -1 TAIL] sqlite3_step $::STMT } - sql3 COMMIT execsql { SELECT * FROM t1 } - } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16} - db busy {} + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18} do_test wal-10.$tn.29 { - execsql { INSERT INTO t1 VALUES(17, 18) } + execsql { INSERT INTO t1 VALUES(19, 20) } catchsql { PRAGMA wal_checkpoint } - } {1 {database is locked}} + } {0 {}} do_test wal-10.$tn.30 { code3 { sqlite3_finalize $::STMT } execsql { PRAGMA wal_checkpoint } @@ -674,20 +651,21 @@ foreach code [list { # Test that this bug has been fixed. # do_test wal-10.$tn.31 { + sql2 COMMIT execsql { BEGIN ; SELECT * FROM t1 } - sql2 { INSERT INTO t1 VALUES(19, 20) } - catchsql { INSERT INTO t1 VALUES(21, 22) } + sql2 { INSERT INTO t1 VALUES(21, 22) } + catchsql { INSERT INTO t1 VALUES(23, 24) } } {1 {database is locked}} do_test wal-10.$tn.32 { # This statement would fail when the bug was present. - sql2 { INSERT INTO t1 VALUES(21, 22) } + sql2 { INSERT INTO t1 VALUES(23, 24) } } {} do_test wal-10.$tn.33 { execsql { SELECT * FROM t1 ; COMMIT } - } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18} + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20} do_test wal-10.$tn.34 { execsql { SELECT * FROM t1 } - } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22} + } {1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24} # Test that if a checkpointer cannot obtain the required locks, it # releases all locks before returning a busy error. @@ -703,11 +681,9 @@ foreach code [list { SELECT * FROM t1; } } {a b c d} - proc busyhandler x { return 1 } - db busy busyhandler do_test wal-10.$tn.36 { catchsql { PRAGMA wal_checkpoint } - } {1 {database is locked}} + } {0 {}} do_test wal-10.$tn.36 { sql3 { INSERT INTO t1 VALUES('e', 'f') } sql2 { SELECT * FROM t1 } @@ -1059,8 +1035,9 @@ do_test wal-15.3.3 { sqlite3_errmsg db } {database table is locked} -# Also test that an error is returned if the db cannot be checkpointed -# because of locks held by another connection. +# Earlier versions returned an error is returned if the db cannot be +# checkpointed because of locks held by another connection. Check that +# this is no longer the case. # sqlite3 db2 test.db do_test wal-15.4.1 { @@ -1072,10 +1049,10 @@ do_test wal-15.4.1 { do_test wal-15.4.2 { execsql { COMMIT } sqlite3_wal_checkpoint db -} {SQLITE_BUSY} +} {SQLITE_OK} do_test wal-15.4.3 { sqlite3_errmsg db -} {database is locked} +} {not an error} # After [db2] drops its lock, [db] may checkpoint the db. # diff --git a/test/wal2.test b/test/wal2.test index 4fe9165f81..82ee8c4c29 100644 --- a/test/wal2.test +++ b/test/wal2.test @@ -21,7 +21,7 @@ ifcapable !wal {finish_test ; return } proc set_tvfs_hdr {file args} { # Set $nHdr to the number of bytes in the wal-index header: - set nHdr 80 + set nHdr 40 set nInt [expr {$nHdr/4}] if {[llength $args]>1} { @@ -29,9 +29,10 @@ proc set_tvfs_hdr {file args} { } set blob [tvfs shm $file] + if {[llength $args]} { set ia [lindex $args 0] - set tail [string range $blob [expr $nHdr*2] end] + binary scan $blob a[expr $nHdr*2]a* dummy tail set blob [binary format i${nInt}i${nInt}a* $ia $ia $tail] tvfs shm $file $blob } @@ -92,19 +93,28 @@ do_test wal2-1.1 { execsql { SELECT count(a), sum(a) FROM t1 } db2 } {4 10} -foreach {tn iInsert res wal_index_hdr_mod wal_locks} { - 2 5 {5 15} 0 {READ RECOVER READ UNLOCK} - 3 6 {6 21} 1 {READ RECOVER READ UNLOCK} - 4 7 {7 28} 2 {READ RECOVER READ UNLOCK} - 5 8 {8 36} 3 {READ RECOVER READ UNLOCK} - 6 9 {9 45} 4 {READ RECOVER READ UNLOCK} - 7 10 {10 55} 5 {READ RECOVER READ UNLOCK} - 8 11 {11 66} 6 {READ RECOVER READ UNLOCK} - 9 12 {12 78} 7 {READ RECOVER READ UNLOCK} - 10 13 {13 91} 8 {READ RECOVER READ UNLOCK} - 11 14 {14 105} 9 {READ RECOVER READ UNLOCK} - 12 15 {15 120} -1 {READ UNLOCK} -} { +set RECOVER [list \ + {0 1 lock exclusive} {1 7 lock exclusive} \ + {1 7 unlock exclusive} {0 1 unlock exclusive} \ +] +set READ [list \ + {4 1 lock exclusive} {4 1 unlock exclusive} \ + {4 1 lock shared} {4 1 unlock shared} \ +] + +foreach {tn iInsert res wal_index_hdr_mod wal_locks} " + 2 5 {5 15} 0 {$RECOVER $READ} + 3 6 {6 21} 1 {$RECOVER $READ} + 4 7 {7 28} 2 {$RECOVER $READ} + 5 8 {8 36} 3 {$RECOVER $READ} + 6 9 {9 45} 4 {$RECOVER $READ} + 7 10 {10 55} 5 {$RECOVER $READ} + 8 11 {11 66} 6 {$RECOVER $READ} + 9 12 {12 78} 7 {$RECOVER $READ} + 10 13 {13 91} 8 {$RECOVER $READ} + 11 14 {14 105} 9 {$RECOVER $READ} + 12 15 {15 120} -1 {$READ} +" { do_test wal2-1.$tn.1 { execsql { INSERT INTO t1 VALUES($iInsert) } @@ -119,7 +129,6 @@ foreach {tn iInsert res wal_index_hdr_mod wal_locks} { incr_tvfs_hdr [lindex $args 0] $::wal_index_hdr_mod 1 } } - if {$method == "xShmLock"} { lappend ::locks [lindex $args 2] } return SQLITE_OK } @@ -150,6 +159,11 @@ file delete -force test.db test.db-wal test.db-journal # to run recovery. This time, it sees an up-to-date snapshot of the # database file. # +set WRITER [list 0 1 lock exclusive] +set LOCKS [list \ + {0 1 lock exclusive} {0 1 unlock exclusive} \ + {4 1 lock shared} {4 1 unlock shared} \ +] do_test wal2-2.0 { testvfs tvfs tvfs_cb @@ -206,7 +220,7 @@ foreach {tn iInsert res0 res1 wal_index_hdr_mod} { if {$method == "xShmLock"} { set lock [lindex $args 2] lappend ::locks $lock - if {$lock == "RECOVER"} { + if {$lock == $::WRITER} { set_tvfs_hdr $::shm_file $::oldhdr } } @@ -218,7 +232,7 @@ foreach {tn iInsert res0 res1 wal_index_hdr_mod} { do_test wal2-2.$tn.3 { set ::locks - } {READ RECOVER READ UNLOCK} + } $LOCKS do_test wal2-2.$tn.4 { set ::locks [list] @@ -245,6 +259,8 @@ db2 close tvfs delete file delete -force test.db test.db-wal test.db-journal + +if 0 { #------------------------------------------------------------------------- # This test case - wal2-3.* - tests the response of the library to an # SQLITE_BUSY when attempting to obtain a READ or RECOVER lock. @@ -315,6 +331,8 @@ db close tvfs delete file delete -force test.db test.db-wal test.db-journal +} + #------------------------------------------------------------------------- # Test that a database connection using a VFS that does not support the # xShmXXX interfaces cannot open a WAL database. @@ -349,6 +367,19 @@ tvfs delete # Test that if a database connection is forced to run recovery before it # can perform a checkpoint, it does not transition into RECOVER state. # +# UPDATE: This has now changed. When running a checkpoint, if recovery is +# required the client grabs all exclusive locks (just as it would for a +# recovery performed as a pre-cursor to a normal database transaction). +# +set expected_locks [list] +lappend expected_locks {1 1 lock exclusive} ;# Lock checkpoint +lappend expected_locks {0 1 lock exclusive} ;# Lock writer +lappend expected_locks {2 6 lock exclusive} ;# Lock recovery & all aReadMark[] +lappend expected_locks {2 6 unlock exclusive} ;# Unlock recovery & aReadMark[] +lappend expected_locks {0 1 unlock exclusive} ;# Unlock writer +lappend expected_locks {3 1 lock exclusive} ;# Lock aReadMark[0] +lappend expected_locks {3 1 unlock exclusive} ;# Unlock aReadMark[0] +lappend expected_locks {1 1 unlock exclusive} ;# Unlock checkpoint do_test wal2-5.1 { proc tvfs_cb {method args} { set ::shm_file [lindex $args 0] @@ -370,7 +401,7 @@ do_test wal2-5.1 { set ::locks [list] execsql { PRAGMA wal_checkpoint } set ::locks -} {CHECKPOINT UNLOCK} +} $expected_locks db close tvfs delete @@ -535,6 +566,11 @@ do_test wal2-6.3.7 { } {main exclusive temp closed} db close + +# This test - wal2-6.4.* - uses a single database connection and the +# [testvfs] instrumentation to test that xShmLock() is being called +# as expected when a WAL database is used with locking_mode=exclusive. +# do_test wal2-6.4.1 { file delete -force test.db test.db-wal test.db-journal proc tvfs_cb {method args} { @@ -544,45 +580,114 @@ do_test wal2-6.4.1 { } testvfs tvfs tvfs_cb sqlite3 db test.db -vfs tvfs +} {} - execsql { +set RECOVERY { + {0 1 lock exclusive} {1 7 lock exclusive} + {1 7 unlock exclusive} {0 1 unlock exclusive} +} +set READMARK0_READ { + {3 1 lock shared} {3 1 unlock shared} +} +set READMARK0_WRITE { + {3 1 lock shared} + {0 1 lock exclusive} {3 1 unlock shared} + {4 1 lock exclusive} {4 1 unlock exclusive} {4 1 lock shared} + {0 1 unlock exclusive} {4 1 unlock shared} +} +set READMARK1_SET { + {4 1 lock exclusive} {4 1 unlock exclusive} +} +set READMARK1_READ { + {4 1 lock shared} {4 1 unlock shared} +} + +foreach {tn sql res expected_locks} { + 2 { PRAGMA journal_mode = WAL; - CREATE TABLE t1(x); - INSERT INTO t1 VALUES('Leonard'); - INSERT INTO t1 VALUES('Arthur'); + BEGIN; + CREATE TABLE t1(x); + INSERT INTO t1 VALUES('Leonard'); + INSERT INTO t1 VALUES('Arthur'); + COMMIT; + } {wal} { + $RECOVERY + $READMARK0_READ + $READMARK0_WRITE } - set ::locks [list] - execsql { PRAGMA locking_mode = exclusive } - set ::locks -} {} -do_test wal2-6.4.2 { - execsql { SELECT * FROM t1 } -} {Leonard Arthur} -do_test wal2-6.4.3 { - set ::locks -} {READ} -do_test wal2-6.4.4 { - execsql { + 3 { + # This test should do the READMARK1_SET locking to populate the + # aReadMark[1] slot with the current mxFrame value. Followed by + # READMARK1_READ to read the database. + # + SELECT * FROM t1 + } {Leonard Arthur} { + $READMARK1_SET + $READMARK1_READ + } + + 4 { + # aReadMark[1] is already set to mxFrame. So just READMARK1_READ + # this time, not READMARK1_SET. + # + SELECT * FROM t1 ORDER BY x + } {Arthur Leonard} { + $READMARK1_READ + } + + 5 { + PRAGMA locking_mode = exclusive + } {exclusive} { } + + 6 { INSERT INTO t1 VALUES('Julius Henry'); SELECT * FROM t1; + } {Leonard Arthur {Julius Henry}} { + $READMARK1_READ } -} {Leonard Arthur {Julius Henry}} -do_test wal2-6.4.5 { - set ::locks -} {READ} -do_test wal2-6.4.6 { - execsql { - PRAGMA locking_mode = NORMAL; - DELETE FROM t1; + + 7 { + INSERT INTO t1 VALUES('Karl'); + SELECT * FROM t1; + } {Leonard Arthur {Julius Henry} Karl} { } + + 8 { + PRAGMA locking_mode = normal + } {normal} { } + + 9 { + SELECT * FROM t1 ORDER BY x + } {Arthur {Julius Henry} Karl Leonard} { } + + 10 { + DELETE FROM t1 + } {} { + $READMARK1_READ } - set ::locks -} {READ UNLOCK} -do_test wal2-6.4.7 { + + 11 { + SELECT * FROM t1 + } {} { + $READMARK1_SET + $READMARK1_READ + } +} { + + set L [list] + foreach el [subst $expected_locks] { lappend L $el } + + set S "" + foreach sq [split $sql "\n"] { + set sq [string trim $sq] + if {[string match {#*} $sq]==0} {append S "$sq\n"} + } + set ::locks [list] - execsql { INSERT INTO t1 VALUES('Karl') } - set ::locks -} {READ WRITE READ UNLOCK} + do_test wal2-6.4.$tn.1 { execsql $S } $res + do_test wal2-6.4.$tn.2 { set ::locks } $L +} + db close tvfs delete diff --git a/test/walthread.test b/test/walthread.test index 1527cadb1f..9817c0e0f5 100644 --- a/test/walthread.test +++ b/test/walthread.test @@ -284,6 +284,12 @@ do_thread_test2 walthread-1 -seconds $seconds(walthread-1) -init { } } + # Turn off auto-checkpoint. Otherwise, an auto-checkpoint run by a + # writer may cause the dedicated checkpoint thread to return an + # SQLITE_BUSY error. + # + db eval { PRAGMA wal_autocheckpoint = 0 } + set nRun 0 while {[tt_continue]} { read_transaction @@ -389,7 +395,10 @@ do_thread_test walthread-3 -seconds $seconds(walthread-3) -init { set nextwrite $E(pid) proc wal_hook {zDb nEntry} { - if {$nEntry>10} {db eval {PRAGMA wal_checkpoint}} + if {$nEntry>10} { + set rc [catch { db eval {PRAGMA wal_checkpoint} } msg] + if {$rc && $msg != "database is locked"} { error $msg } + } return 0 } db wal_hook wal_hook