Add experimental locking scheme.

FossilOrigin-Name: 3f958e87c33d667d299b03ffdef58db5dc6363f4
This commit is contained in:
dan 2010-04-13 19:27:31 +00:00
parent 622194c0d2
commit 64d039e512
5 changed files with 248 additions and 67 deletions

View File

@ -1,8 +1,5 @@
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
C Fix\san\suninitialized\svariable\sin\sreadDbPage\sof\spager.c.
D 2010-04-13T15:30:53
C Add\sexperimental\slocking\sscheme.
D 2010-04-13T19:27:31
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in 4f2f967b7e58a35bb74fb7ec8ae90e0f4ca7868b
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@ -134,8 +131,8 @@ F src/journal.c b0ea6b70b532961118ab70301c00a33089f9315c
F src/legacy.c a199d7683d60cef73089e892409113e69c23a99f
F src/lempar.c 7f026423f4d71d989e719a743f98a1cbd4e6d99e
F src/loadext.c 1c7a61ce1281041f437333f366a96aa0d29bb581
F src/log.c 6e8f296f6c566a297cd074c4165f1695fd1df5b7
F src/log.h e691f7d935d6a8ad63b9de2e6014627056f01e1a
F src/log.c d89988bb26a3cd414858c97642a612b4ce6e540f
F src/log.h a2654af46ce7b5732f4d5a731abfdd180f0a06d9
F src/main.c c0e7192bad5b90544508b241eb2487ac661de890
F src/malloc.c a08f16d134f0bfab6b20c3cd142ebf3e58235a6a
F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645
@ -157,7 +154,7 @@ F src/os_common.h 240c88b163b02c21a9f21f87d49678a0aa21ff30
F src/os_os2.c 75a8c7b9a00a2cf1a65f9fa4afbc27d46634bb2f
F src/os_unix.c 5bf0015cebe2f21635da2af983c348eb88b3b4c1
F src/os_win.c 1c7453c2df4dab26d90ff6f91272aea18bcf7053
F src/pager.c c4937e7175f0aa66b9122d05cc163c039f854855
F src/pager.c 9e9ee38c923fd225d73127751b7959bd826d0686
F src/pager.h ce5d076f3860a5f2d7460c582cd68383343b33cf
F src/parse.y ace5c7a125d9f2a410e431ee3209034105045f7e
F src/pcache.c ace8f6a5ecd4711cc66a1b23053be7109bd437cf
@ -806,14 +803,7 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P 27dc5977c19e717afd65d3805557e38dec7bedcb
R ca7988a67487ccd526ae5248710a4503
U drh
Z c2421237a2b7de8b8aa2d4d5608541bc
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iD8DBQFLxI4woxKgR168RlERAp8sAJ9x+oJYnqOMgUNsC198WiuQpy6qyQCfcaVa
jjZp98/Y+EUb/i8plNNLbiM=
=2Ctn
-----END PGP SIGNATURE-----
P f4e1150fed2c520c7c52612cb1019429d78dc32a
R da172d13acb33c194abd32bf4ed20cc9
U dan
Z cd10eeca0187a805032040fdd27191f0

View File

@ -1 +1 @@
f4e1150fed2c520c7c52612cb1019429d78dc32a
3f958e87c33d667d299b03ffdef58db5dc6363f4

232
src/log.c
View File

@ -13,6 +13,7 @@
typedef struct LogSummaryHdr LogSummaryHdr;
typedef struct LogSummary LogSummary;
typedef struct LogCheckpoint LogCheckpoint;
typedef struct LogLock LogLock;
/*
@ -55,29 +56,51 @@ struct LogSummary {
int nRef; /* Number of pointers to this structure */
int fd; /* File descriptor open on log-summary */
char *zPath; /* Path to associated WAL file */
LogLock *pLock; /* Linked list of locks on this object */
LogSummary *pNext; /* Next in global list */
int nData; /* Size of aData allocation/mapping */
u32 *aData; /* File body */
};
/*
** List of all LogSummary objects created by this process. Protected by
** static mutex LOG_SUMMARY_MUTEX. TODO: Should have a dedicated mutex
** here instead of borrowing the LRU mutex.
** The four lockable regions associated with each log-summary. A connection
** may take either a SHARED or EXCLUSIVE lock on each.
*/
#define LOG_SUMMARY_MUTEX SQLITE_MUTEX_STATIC_LRU
static LogSummary *pLogSummary = 0;
#define LOG_REGION_A 0x01
#define LOG_REGION_B 0x02
#define LOG_REGION_C 0x04
#define LOG_REGION_D 0x08
/*
** A single instance of this structure is allocated as part of each
** connection to a database log. All structures associated with the
** same log file are linked together into a list using LogLock.pNext
** starting at LogSummary.pLock.
**
** The mLock field of the structure describes the locks (if any)
** currently held by the connection. If a SHARED lock is held on
** any of the four locking regions, then the associated LOG_REGION_X
** bit (see above) is set. If an EXCLUSIVE lock is held on the region,
** then the (LOG_REGION_X << 8) bit is set.
*/
struct LogLock {
LogLock *pNext; /* Next lock on the same log */
u32 mLock; /* Mask of locks */
};
struct Log {
LogSummary *pSummary; /* Log file summary data */
sqlite3_vfs *pVfs; /* The VFS used to create pFd */
sqlite3_file *pFd; /* File handle for log file */
int sync_flags; /* Flags to use with OsSync() */
int isLocked; /* True if a snapshot is held open */
int isLocked; /* Non-zero if a snapshot is held open */
int isWriteLocked; /* True if this is the writer connection */
LogSummaryHdr hdr; /* Log summary header for current snapshot */
LogLock lock; /* Lock held by this connection (if any) */
};
/*
** This structure is used to implement an iterator that iterates through
** all frames in the log in database page order. Where two or more frames
@ -102,6 +125,15 @@ struct LogCheckpoint {
} aSegment[1];
};
/*
** List of all LogSummary objects created by this process. Protected by
** static mutex LOG_SUMMARY_MUTEX. TODO: Should have a dedicated mutex
** here instead of borrowing the LRU mutex.
*/
#define LOG_SUMMARY_MUTEX SQLITE_MUTEX_STATIC_LRU
static LogSummary *pLogSummary = 0;
/*
** Generate an 8 byte checksum based on the data in array aByte[] and the
** initial values of aCksum[0] and aCksum[1]. The checksum is written into
@ -664,6 +696,9 @@ int sqlite3LogOpen(
rc = sqlite3OsLock(pRet->pFd, SQLITE_LOCK_SHARED);
}
pRet->lock.pNext = pSummary->pLock;
pSummary->pLock = &pRet->lock;
out:
sqlite3_mutex_leave(mutex);
sqlite3_free(zWal);
@ -838,9 +873,15 @@ int sqlite3LogClose(
){
int rc = SQLITE_OK;
if( pLog ){
LogLock **ppL;
LogSummary *pSummary = pLog->pSummary;
sqlite3_mutex *mutex = 0;
sqlite3_mutex_enter(pSummary->mutex);
for(ppL=&pSummary->pLock; *ppL!=&pLog->lock; ppL=&(*ppL)->pNext);
*ppL = pLog->lock.pNext;
sqlite3_mutex_leave(pSummary->mutex);
if( sqlite3GlobalConfig.bCoreMutex ){
mutex = sqlite3_mutex_alloc(LOG_SUMMARY_MUTEX);
}
@ -939,7 +980,108 @@ static void logLeaveMutex(Log *pLog){
}
/*
** The caller must hold a SHARED lock on the database file.
** Values for the second parameter to logLockRegion().
*/
#define LOG_UNLOCK 0
#define LOG_RDLOCK 1
#define LOG_WRLOCK 2
static int logLockRegion(Log *pLog, u32 mRegion, int op){
LogSummary *pSummary = pLog->pSummary;
LogLock *p; /* Used to iterate through in-process locks */
u32 mNew; /* New locks on file */
u32 mOld; /* Old locks on file */
u32 mNewLock; /* New locks held by pLog */
assert(
/* Writer lock operations */
(op==LOG_WRLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_C|LOG_REGION_D))
/* Reader lock operations */
|| (op==LOG_RDLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B))
|| (op==LOG_RDLOCK && mRegion==(LOG_REGION_D))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_A))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_B))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_D))
/* Checkpointer lock operations */
|| (op==LOG_WRLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C))
|| (op==LOG_WRLOCK && mRegion==(LOG_REGION_A))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_A|LOG_REGION_B|LOG_REGION_C))
|| (op==LOG_UNLOCK && mRegion==(LOG_REGION_B|LOG_REGION_C))
);
sqlite3_mutex_enter(pSummary->mutex);
/* If obtaining (not releasing) a lock, check if there exist any
** conflicting locks in process. Return SQLITE_BUSY in this case.
*/
if( op ){
u32 mConflict = (mRegion<<8) | ((op==LOG_WRLOCK) ? mRegion : 0);
for(p=pSummary->pLock; p; p=p->pNext){
if( p!=&pLog->lock && (p->mLock & mConflict) ){
sqlite3_mutex_leave(pSummary->mutex);
return SQLITE_BUSY;
}
}
}
/* Determine the new lock mask for this log connection */
switch( op ){
case LOG_UNLOCK:
mNewLock = (pLog->lock.mLock & ~(mRegion|(mRegion<<8)));
break;
case LOG_RDLOCK:
mNewLock = ((pLog->lock.mLock & ~(mRegion<<8)) | mRegion);
break;
default:
assert( op==LOG_WRLOCK );
mNewLock = (pLog->lock.mLock | (mRegion<<8) | mRegion);
break;
}
/* Determine the current and desired sets of locks at the file level. */
mNew = 0;
for(p=pSummary->pLock; p; p=p->pNext){
assert( (p->mLock & (p->mLock<<8))==(p->mLock & 0x00000F00) );
if( p!=&pLog->lock ) mNew |= p->mLock;
}
mOld = mNew | pLog->lock.mLock;
mNew = mNew | mNewLock;
if( mNew!=mOld ){
int rc;
u32 mChange = (mNew^mOld) | ((mNew^mOld)>>8);
struct flock f;
memset(&f, 0, sizeof(f));
f.l_type = (op==LOG_WRLOCK?F_WRLCK:(op==LOG_RDLOCK?F_RDLCK:F_UNLCK));
f.l_whence = SEEK_SET;
if( mChange & LOG_REGION_A ) f.l_start = 12;
else if( mChange & LOG_REGION_B ) f.l_start = 13;
else if( mChange & LOG_REGION_C ) f.l_start = 14;
else if( mChange & LOG_REGION_D ) f.l_start = 15;
if( mChange & LOG_REGION_D ) f.l_len = 16 - f.l_start;
else if( mChange & LOG_REGION_C ) f.l_len = 15 - f.l_start;
else if( mChange & LOG_REGION_B ) f.l_len = 14 - f.l_start;
else if( mChange & LOG_REGION_A ) f.l_len = 13 - f.l_start;
rc = fcntl(pSummary->fd, F_SETLK, &f);
if( rc!=0 ){
sqlite3_mutex_leave(pSummary->mutex);
return SQLITE_BUSY;
}
}
pLog->lock.mLock = mNewLock;
sqlite3_mutex_leave(pSummary->mutex);
return SQLITE_OK;
}
/*
** Lock a snapshot.
**
** If this call obtains a new read-lock and the database contents have been
** modified since the most recent call to LogCloseSnapshot() on this Log
@ -950,6 +1092,36 @@ static void logLeaveMutex(Log *pLog){
int sqlite3LogOpenSnapshot(Log *pLog, int *pChanged){
int rc = SQLITE_OK;
if( pLog->isLocked==0 ){
int nAttempt;
/* Obtain a snapshot-lock on the log-summary file. The procedure
** for obtaining the snapshot log is:
**
** 1. Attempt a SHARED lock on regions A and B.
** 2a. If step 1 is successful, drop the lock on region B.
** 2b. If step 1 is unsuccessful, attempt a SHARED lock on region D.
** 3. Repeat the above until the lock attempt in step 1 or 2b is
** successful.
**
** If neither of the locks can be obtained after 5 tries, presumably
** something is wrong (i.e. a process not following the locking protocol).
** Return an error code in this case.
*/
rc = SQLITE_BUSY;
for(nAttempt=0; nAttempt<5 && rc==SQLITE_BUSY; nAttempt++){
rc = logLockRegion(pLog, LOG_REGION_A|LOG_REGION_B, LOG_RDLOCK);
if( rc==SQLITE_BUSY ){
rc = logLockRegion(pLog, LOG_REGION_D, LOG_RDLOCK);
if( rc==SQLITE_OK ) pLog->isLocked = LOG_REGION_D;
}else{
logLockRegion(pLog, LOG_REGION_B, LOG_UNLOCK);
pLog->isLocked = LOG_REGION_A;
}
}
if( rc!=SQLITE_OK ){
return rc;
}
if( SQLITE_OK==(rc = logEnterMutex(pLog)) ){
u32 aCksum[2] = {1, 1};
u32 aHdr[LOGSUMMARY_HDR_NFIELD+2];
@ -967,7 +1139,6 @@ int sqlite3LogOpenSnapshot(Log *pLog, int *pChanged){
*pChanged = 1;
}
if( rc==SQLITE_OK ){
pLog->isLocked = 1;
if( memcmp(&pLog->hdr, aHdr, sizeof(LogSummaryHdr)) ){
*pChanged = 1;
memcpy(&pLog->hdr, aHdr, LOGSUMMARY_HDR_NFIELD*sizeof(u32));
@ -975,6 +1146,11 @@ int sqlite3LogOpenSnapshot(Log *pLog, int *pChanged){
}
logLeaveMutex(pLog);
}
if( rc!=SQLITE_OK ){
/* An error occured while attempting log recovery. */
sqlite3LogCloseSnapshot(pLog);
}
}
return rc;
}
@ -983,6 +1159,10 @@ int sqlite3LogOpenSnapshot(Log *pLog, int *pChanged){
** Unlock the current snapshot.
*/
void sqlite3LogCloseSnapshot(Log *pLog){
if( pLog->isLocked ){
assert( pLog->isLocked==LOG_REGION_A || pLog->isLocked==LOG_REGION_D );
logLockRegion(pLog, pLog->isLocked, LOG_UNLOCK);
}
pLog->isLocked = 0;
}
@ -1072,11 +1252,20 @@ void sqlite3LogMaxpgno(Log *pLog, Pgno *pPgno){
int sqlite3LogWriteLock(Log *pLog, int op){
assert( pLog->isLocked );
if( op ){
/* Obtain the writer lock */
int rc = logLockRegion(pLog, LOG_REGION_C|LOG_REGION_D, LOG_WRLOCK);
if( rc!=SQLITE_OK ){
return rc;
}
if( memcmp(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr)) ){
return SQLITE_BUSY;
}
pLog->isWriteLocked = 1;
}else if( pLog->isWriteLocked ){
logLockRegion(pLog, LOG_REGION_C|LOG_REGION_D, LOG_UNLOCK);
memcpy(&pLog->hdr, pLog->pSummary->aData, sizeof(pLog->hdr));
pLog->isWriteLocked = 0;
}
@ -1226,18 +1415,25 @@ int sqlite3LogFrames(
int sqlite3LogCheckpoint(
Log *pLog, /* Log connection */
sqlite3_file *pFd, /* File descriptor open on db file */
u8 *zBuf /* Temporary buffer to use */
u8 *zBuf, /* Temporary buffer to use */
int (*xBusyHandler)(void *), /* Pointer to busy-handler function */
void *pBusyHandlerArg /* Argument to pass to xBusyHandler */
){
int rc;
/* Assert() that the caller is holding an EXCLUSIVE lock on the
** database file.
*/
#ifdef SQLITE_DEBUG
int lock;
sqlite3OsFileControl(pFd, SQLITE_FCNTL_LOCKSTATE, &lock);
assert( lock>=4 );
#endif
do {
rc = logLockRegion(pLog, LOG_REGION_B|LOG_REGION_C, LOG_WRLOCK);
}while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) );
if( rc!=SQLITE_OK ) return rc;
return logCheckpoint(pLog, pFd, zBuf);
do {
rc = logLockRegion(pLog, LOG_REGION_A, LOG_WRLOCK);
}while( rc==SQLITE_BUSY && xBusyHandler(pBusyHandlerArg) );
if( rc!=SQLITE_OK ) return rc;
rc = logCheckpoint(pLog, pFd, zBuf);
logLockRegion(pLog, LOG_REGION_A|LOG_REGION_B|LOG_REGION_C, LOG_UNLOCK);
return rc;
}

View File

@ -55,7 +55,9 @@ int sqlite3LogFrames(Log *pLog, int, PgHdr *, Pgno, int, int);
int sqlite3LogCheckpoint(
Log *pLog, /* Log connection */
sqlite3_file *pFd, /* File descriptor open on db file */
u8 *zBuf /* Temporary buffer to use */
u8 *zBuf, /* Temporary buffer to use */
int (*xBusyHandler)(void *), /* Pointer to busy-handler function */
void *pBusyHandlerArg /* Argument to pass to xBusyHandler */
);
#endif /* _LOG_H_ */

View File

@ -3120,6 +3120,7 @@ static int pager_write_pagelist(PgHdr *pList){
** EXCLUSIVE, it means the database file has been changed and any rollback
** will require a journal playback.
*/
assert( !pagerUseLog(pList->pPager) );
assert( pPager->state>=PAGER_RESERVED );
rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
@ -3785,21 +3786,13 @@ int sqlite3PagerSharedLock(Pager *pPager){
if( pagerUseLog(pPager) ){
int changed = 0;
/* TODO: Change the following block to grab a WAL read-lock. Or,
** combine obtaining the read-lock with LogOpenSnapshot()? */
rc = pager_wait_on_lock(pPager, SHARED_LOCK);
if( rc!=SQLITE_OK ){
assert( pPager->state==PAGER_UNLOCK );
return pager_error(pPager, rc);
}
rc = sqlite3LogOpenSnapshot(pPager->pLog, &changed);
if( rc==SQLITE_OK ){
if( changed ){
pager_reset(pPager);
assert( pPager->errCode || pPager->dbSizeValid==0 );
}
pPager->state = PAGER_SHARED;
pPager->state = PAGER_SHARED; /* TODO: Is this right? */
rc = sqlite3PagerPagecount(pPager, &changed);
}
}else if( pPager->state==PAGER_UNLOCK || isErrorReset ){
@ -4330,20 +4323,7 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){
assert( pPager->pInJournal==0 );
assert( !MEMDB && !pPager->tempFile );
/* Obtain a RESERVED lock on the database file. If the exFlag parameter
** is true, then immediately upgrade this to an EXCLUSIVE lock. The
** busy-handler callback can be used when upgrading to the EXCLUSIVE
** lock, but not when obtaining the RESERVED lock.
*/
rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
if( rc==SQLITE_OK ){
pPager->state = PAGER_RESERVED;
if( exFlag ){
rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
}
}
if( rc==SQLITE_OK && pagerUseLog(pPager) ){
if( pagerUseLog(pPager) ){
/* Grab the write lock on the log file. If successful, upgrade to
** PAGER_EXCLUSIVE state. Otherwise, return an error code to the caller.
** The busy-handler is not invoked if another connection already
@ -4352,6 +4332,20 @@ int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){
rc = sqlite3LogWriteLock(pPager->pLog, 1);
if( rc==SQLITE_OK ){
pPager->dbOrigSize = pPager->dbSize;
pPager->state = PAGER_RESERVED;
}
}else{
/* Obtain a RESERVED lock on the database file. If the exFlag parameter
** is true, then immediately upgrade this to an EXCLUSIVE lock. The
** busy-handler callback can be used when upgrading to the EXCLUSIVE
** lock, but not when obtaining the RESERVED lock.
*/
rc = sqlite3OsLock(pPager->fd, RESERVED_LOCK);
if( rc==SQLITE_OK ){
pPager->state = PAGER_RESERVED;
if( exFlag ){
rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
}
}
}
@ -5657,11 +5651,10 @@ sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){
int sqlite3PagerCheckpoint(Pager *pPager){
int rc = SQLITE_OK;
if( pPager->pLog ){
rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
if( rc==SQLITE_OK ){
u8 *zBuf = (u8 *)pPager->pTmpSpace;
rc = sqlite3LogCheckpoint(pPager->pLog, pPager->fd, zBuf);
}
u8 *zBuf = (u8 *)pPager->pTmpSpace;
rc = sqlite3LogCheckpoint(pPager->pLog, pPager->fd,
zBuf, pPager->xBusyHandler, pPager->pBusyHandlerArg
);
}
return rc;
}