Experimental change to the xShmXXX parts of the VFS interface.

FossilOrigin-Name: ca68472db01c14a899892007d1cbaff5e86ae193
This commit is contained in:
dan 2010-06-11 19:04:21 +00:00
parent 0b9b4301b8
commit 13a3cb82ce
12 changed files with 428 additions and 395 deletions

View File

@ -1,8 +1,5 @@
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
C Refactor\sand\ssimplify\sthe\slogic\sused\sto\schange\sjournalmode.
D 2010-06-11T17:01:25
C Experimental\schange\sto\sthe\sxShmXXX\sparts\sof\sthe\sVFS\sinterface.
D 2010-06-11T19:04:21
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in a5cad1f8f3e021356bfcc6c77dc16f6f1952bbc3
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@ -153,11 +150,11 @@ F src/mutex_os2.c 6a62583e374ba3ac1a3fcc0da2bfdac7d3942689
F src/mutex_unix.c cf84466b4fdd2baa0d5a10bb19f08b2abc1ce42e
F src/mutex_w32.c 1fe0e735897be20e09dd6f53c3fb516c6b48c0eb
F src/notify.c cbfa66a836da3a51567209636e6a94059c137930
F src/os.c 1516984144e26734f97748f891f1a04f9e294c2e
F src/os.h 6f529984a29511c7a3479cfe549c10bfa131532f
F src/os.c 00ab9dcdee6e33ff3d060744c86af25200c51e0f
F src/os.h a0d2c1436cb6003e6da16001499a0b828f1edb34
F src/os_common.h a8f95b81eca8a1ab8593d23e94f8a35f35d4078f
F src/os_os2.c 665876d5eec7585226b0a1cf5e18098de2b2da19
F src/os_unix.c 12051d37e533cdaa8bb13c9d9fe2a13e08552187
F src/os_unix.c 29dac62790ccea7db1516be3abb007988accb165
F src/os_win.c 0cf1f571546f165001e2391b5d4a4a16d86977d3
F src/pager.c 2964185d4356d0dc159b8340e52d2538d32394e5
F src/pager.h ca1f23c0cf137ac26f8908df2427c8b308361efd
@ -173,7 +170,7 @@ F src/resolve.c ac5f1a713cd1ae77f08b83cc69581e11bf5ae6f9
F src/rowset.c 69afa95a97c524ba6faf3805e717b5b7ae85a697
F src/select.c c03d8a0565febcde8c6a12c5d77d065fddae889b
F src/shell.c fd4ccdb37c3b68de0623eb938a649e0990710714
F src/sqlite.h.in b6a64327e174cf725e57dd93ddf1e97c52dd41e2
F src/sqlite.h.in 092df034f4b426ffbb9e5bb905958fa35bbb7f7a
F src/sqlite3ext.h 69dfb8116af51b84a029cddb3b35062354270c89
F src/sqliteInt.h 242987ebd2366ea36650a09cdab04a9163c62109
F src/sqliteLimit.h 196e2f83c3b444c4548fc1874f52f84fdbda40f3
@ -195,7 +192,7 @@ F src/test_backup.c c129c91127e9b46e335715ae2e75756e25ba27de
F src/test_btree.c 47cd771250f09cdc6e12dda5bc71bc0b3abc96e2
F src/test_config.c 6210f501d358bde619ae761f06f123529c6ba24f
F src/test_demovfs.c da81a5f7785bb352bda7911c332a983ec4f17f27
F src/test_devsym.c 709712f5157667410cd0dad1b7b1b54319c122c5
F src/test_devsym.c cf64a4b602ccde10c9261283d1b9be12f4c4a0ea
F src/test_func.c 13b582345fb1185a93e46c53310fae8547dcce20
F src/test_hexio.c 1237f000ec7a491009b1233f5c626ea71bce1ea2
F src/test_init.c 5d624ffd0409d424cf9adbfe1f056b200270077c
@ -212,7 +209,7 @@ F src/test_schema.c 8c06ef9ddb240c7a0fcd31bc221a6a2aade58bf0
F src/test_server.c bbba05c144b5fc4b52ff650a4328027b3fa5fcc6
F src/test_tclvar.c f4dc67d5f780707210d6bb0eb6016a431c04c7fa
F src/test_thread.c aa9919c885a1fe53eafc73492f0898ee6c0a0726
F src/test_vfs.c d329e3ea93624f65d7b6a46209861ddecea4e21d
F src/test_vfs.c b83206d2c04b3ba84d8d85420c4c7573c58feba5
F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9
F src/tokenize.c 25ceb0f0a746ea1d0f9553787f3f0a56853cfaeb
F src/trigger.c 8927588cb9e6d47f933b53bfe74200fbb504100d
@ -229,7 +226,7 @@ F src/vdbeblob.c 5327132a42a91e8b7acfb60b9d2c3b1c5c863e0e
F src/vdbemem.c 2a82f455f6ca6f78b59fb312f96054c04ae0ead1
F src/vdbetrace.c 864cef96919323482ebd9986f2132435115e9cc2
F src/vtab.c a0f8a40274e4261696ef57aa806de2776ab72cda
F src/wal.c 2cdfea9a5e50e4dde48767e69e1fead2ff1781cd
F src/wal.c 0aa364734d6daca75771944fc2b4a8f36e63fc4e
F src/wal.h 4ace25262452d17e7d3ec970c89ee17794004008
F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f
F src/where.c 1c895bef33d0dfc7ed90fb1f74120435d210ea56
@ -540,7 +537,7 @@ F test/pageropt.test 8146bf448cf09e87bb1867c2217b921fb5857806
F test/pagesize.test 76aa9f23ecb0741a4ed9d2e16c5fa82671f28efb
F test/pcache.test eebc4420b37cb07733ae9b6e99c9da7c40dd6d58
F test/pcache2.test 0d85f2ab6963aee28c671d4c71bec038c00a1d16
F test/permutations.test ad10d7b31b4a585977380886c832e2ac13c41237
F test/permutations.test 64fbafa685149be54a1ceb545942911f998c604d
F test/pragma.test 6960f9efbce476f70ba9ee2171daf5042f9e3d8a
F test/pragma2.test 5364893491b9231dd170e3459bfc2e2342658b47
F test/printf.test 05970cde31b1a9f54bd75af60597be75a5c54fea
@ -768,8 +765,8 @@ F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5
F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8
F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d
F test/wal.test 0a599c3c4812ed92bc7ad9efcc2c4007fe4cc99a
F test/wal2.test 854a2b409450f1cb756c2bbd1e87e30740094357
F test/wal3.test ae876ff988af5b2b34d27474e0dd1a8c84e9bbcb
F test/wal2.test f9dce93acecff697fc1935869b1ae4cb7dc14587
F test/wal3.test 1d3aee1a0295db941a0323c0ce5ac16bd5b7689d
F test/wal_common.tcl 3e953ae60919281688ea73e4d0aa0e1bc94becd9
F test/walbak.test e7650a26eb4b8abeca9b145b1af1e63026dde432
F test/walcksum.test 4efa8fb88c32bed8288ea4385a9cc113a5c8f0bf
@ -823,14 +820,10 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P af353bd89e5ec89f113d217225cc59cbc8373d64
R 3e2d7a9cc84ff16ee3e947d8ce602cce
U drh
Z e14f3acbecbf2df0abe6fde12480ec15
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iD8DBQFMEmvooxKgR168RlERAg5RAJ9VmP08NyW1of8QWkDMnAiHK8A/xgCeLBcs
hbn4o1Zj2vKA/g5KFcfNycs=
=Li8t
-----END PGP SIGNATURE-----
P 95cc3f6fdec5494560c3cd4439d06870d1c62506
R 05bb7be31606a33c54980de0a121cf7e
T *branch * experimental
T *sym-experimental *
T -sym-trunk *
U dan
Z 7e046a655e9f9112461a59731e31d3dc

View File

@ -1 +1 @@
95cc3f6fdec5494560c3cd4439d06870d1c62506
ca68472db01c14a899892007d1cbaff5e86ae193

View File

@ -119,6 +119,15 @@ void sqlite3OsShmBarrier(sqlite3_file *id){
int sqlite3OsShmClose(sqlite3_file *id, int deleteFlag){
return id->pMethods->xShmClose(id, deleteFlag);
}
int sqlite3OsShmPage(
sqlite3_file *id,
int iPage,
int pgsz,
int isWrite,
void volatile **pp
){
return id->pMethods->xShmPage(id, iPage, pgsz, isWrite, pp);
}
/*
** The next group of routines are convenience wrappers around the

View File

@ -254,6 +254,7 @@ int sqlite3OsShmRelease(sqlite3_file *id);
int sqlite3OsShmLock(sqlite3_file *id, int, int, int);
void sqlite3OsShmBarrier(sqlite3_file *id);
int sqlite3OsShmClose(sqlite3_file *id, int);
int sqlite3OsShmPage(sqlite3_file *,int,int,int,void volatile **);
/*
** Functions for accessing sqlite3_vfs methods

View File

@ -3141,8 +3141,14 @@ struct unixShmNode {
sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */
char *zFilename; /* Name of the mmapped file */
int h; /* Open file descriptor */
int szMap; /* Size of the mapping into memory */
char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */
int pgsz; /* Size of shared-memory pages */
int nPage; /* Size of array apPage */
char **apPage; /* Array of mapped shared-memory pages */
int nRef; /* Number of unixShm objects pointing to this */
unixShm *pFirst; /* All unixShm objects pointing to this */
#ifdef SQLITE_DEBUG
@ -3266,10 +3272,15 @@ static void unixShmPurge(unixFile *pFd){
unixShmNode *p = pFd->pInode->pShmNode;
assert( unixMutexHeld() );
if( p && p->nRef==0 ){
int i;
assert( p->pInode==pFd->pInode );
if( p->mutex ) sqlite3_mutex_free(p->mutex);
if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf);
if( p->pMMapBuf ) munmap(p->pMMapBuf, p->szMap);
for(i=0; i<p->nPage; i++){
munmap(p->apPage[i], p->pgsz);
}
sqlite3_free(p->apPage);
if( p->h>=0 ) close(p->h);
p->pInode->pShmNode = 0;
sqlite3_free(p);
@ -3706,6 +3717,71 @@ static void unixShmBarrier(
unixLeaveMutex();
}
static int unixShmPage(
sqlite3_file *fd, /* Handle open on database file */
int iPage, /* Page to retrieve */
int pgsz, /* Size of pages */
int isWrite, /* True to extend file if necessary */
void volatile **pp /* OUT: Mapped memory */
){
unixFile *pDbFd = (unixFile*)fd;
unixShm *p = pDbFd->pShm;
unixShmNode *pShmNode = p->pShmNode;
int rc = SQLITE_OK;
assert( p->hasMutexBuf==0 );
sqlite3_mutex_enter(pShmNode->mutexBuf);
assert( pgsz==pShmNode->pgsz || pShmNode->nPage==0 );
if( pShmNode->nPage<=iPage ){
char **apNew; /* New apPage[] array */
int nByte = (iPage+1)*pgsz; /* Minimum required file size */
struct stat sStat;
pShmNode->pgsz = pgsz;
/* Make sure the underlying file is large enough (or fail) */
if( fstat(pShmNode->h, &sStat) ){
rc = SQLITE_IOERR_SHMSIZE;
goto shmpage_out;
}else if( sStat.st_size<nByte ){
if( !isWrite ) goto shmpage_out;
if( ftruncate(pShmNode->h, nByte) ){
rc = SQLITE_IOERR_SHMSIZE;
goto shmpage_out;
}
}
apNew = (char**)sqlite3_realloc(pShmNode->apPage, (iPage+1)*sizeof(char *));
if( !apNew ){
rc = SQLITE_IOERR_NOMEM;
goto shmpage_out;
}
pShmNode->apPage = apNew;
while(pShmNode->nPage<=iPage){
void *pMem = mmap(
0, pgsz, PROT_READ|PROT_WRITE, MAP_SHARED, pShmNode->h, iPage*pgsz
);
if( pMem==MAP_FAILED ){
assert(0);
rc = SQLITE_IOERR;
goto shmpage_out;
}
pShmNode->apPage[pShmNode->nPage] = pMem;
pShmNode->nPage++;
}
}
shmpage_out:
if( pShmNode->nPage>iPage ){
*pp = pShmNode->apPage[iPage];
}else{
*pp = 0;
}
sqlite3_mutex_leave(pShmNode->mutexBuf);
return rc;
}
#else
# define unixShmOpen 0
@ -3715,6 +3791,7 @@ static void unixShmBarrier(
# define unixShmLock 0
# define unixShmBarrier 0
# define unixShmClose 0
# define unixShmPage 0
#endif /* #ifndef SQLITE_OMIT_WAL */
/*
@ -3778,7 +3855,8 @@ static const sqlite3_io_methods METHOD = { \
unixShmRelease, /* xShmRelease */ \
unixShmLock, /* xShmLock */ \
unixShmBarrier, /* xShmBarrier */ \
unixShmClose /* xShmClose */ \
unixShmClose, /* xShmClose */ \
unixShmPage /* xShmPage */ \
}; \
static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \
UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \

View File

@ -666,6 +666,7 @@ struct sqlite3_io_methods {
int (*xShmLock)(sqlite3_file*, int offset, int n, int flags);
void (*xShmBarrier)(sqlite3_file*);
int (*xShmClose)(sqlite3_file*, int deleteFlag);
int (*xShmPage)(sqlite3_file*, int iPage, int pgsz, int, void volatile**);
/* Methods above are valid for version 2 */
/* Additional methods may be added in future releases */
};

View File

@ -57,6 +57,7 @@ static int devsymShmRelease(sqlite3_file*);
static int devsymShmLock(sqlite3_file*,int,int,int);
static void devsymShmBarrier(sqlite3_file*);
static int devsymShmClose(sqlite3_file*,int);
static int devsymShmPage(sqlite3_file*,int,int,int, void volatile **);
/*
** Method declarations for devsym_vfs.
@ -125,7 +126,8 @@ static sqlite3_io_methods devsym_io_methods = {
devsymShmRelease, /* xShmRelease */
devsymShmLock, /* xShmLock */
devsymShmBarrier, /* xShmBarrier */
devsymShmClose /* xShmClose */
devsymShmClose, /* xShmClose */
devsymShmPage /* xShmPage */
};
struct DevsymGlobal {
@ -275,6 +277,16 @@ static int devsymShmClose(sqlite3_file *pFile, int delFlag){
devsym_file *p = (devsym_file *)pFile;
return sqlite3OsShmClose(p->pReal, delFlag);
}
static int devsymShmPage(
sqlite3_file *pFile,
int iPage,
int pgsz,
int isWrite,
void volatile **pp
){
devsym_file *p = (devsym_file *)pFile;
return sqlite3OsShmPage(p->pReal, iPage, pgsz, isWrite, pp);
}

View File

@ -75,10 +75,14 @@ struct Testvfs {
#define TESTVFS_SHMLOCK_MASK 0x00000010
#define TESTVFS_SHMBARRIER_MASK 0x00000020
#define TESTVFS_SHMCLOSE_MASK 0x00000040
#define TESTVFS_SHMPAGE_MASK 0x00000080
#define TESTVFS_OPEN_MASK 0x00000080
#define TESTVFS_SYNC_MASK 0x00000100
#define TESTVFS_ALL_MASK 0x000001FF
#define TESTVFS_OPEN_MASK 0x00000100
#define TESTVFS_SYNC_MASK 0x00000200
#define TESTVFS_ALL_MASK 0x000003FF
#define TESTVFS_MAX_PAGES 256
/*
** A shared-memory buffer. There is one of these objects for each shared
@ -87,8 +91,8 @@ struct Testvfs {
*/
struct TestvfsBuffer {
char *zFile; /* Associated file name */
int n; /* Size of allocated buffer in bytes */
u8 *a; /* Buffer allocated using ckalloc() */
int pgsz; /* Page size */
u8 *aPage[TESTVFS_MAX_PAGES]; /* Array of ckalloc'd pages */
TestvfsFile *pFile; /* List of open handles */
TestvfsBuffer *pNext; /* Next in linked list of all buffers */
};
@ -139,6 +143,7 @@ static int tvfsShmRelease(sqlite3_file*);
static int tvfsShmLock(sqlite3_file*, int , int, int);
static void tvfsShmBarrier(sqlite3_file*);
static int tvfsShmClose(sqlite3_file*, int);
static int tvfsShmPage(sqlite3_file*,int,int,int, void volatile **);
static sqlite3_io_methods tvfs_io_methods = {
2, /* iVersion */
@ -160,7 +165,8 @@ static sqlite3_io_methods tvfs_io_methods = {
tvfsShmRelease, /* xShmRelease */
tvfsShmLock, /* xShmLock */
tvfsShmBarrier, /* xShmBarrier */
tvfsShmClose /* xShmClose */
tvfsShmClose, /* xShmClose */
tvfsShmPage /* xShmPage */
};
static int tvfsResultCode(Testvfs *p, int *pRc){
@ -547,16 +553,6 @@ static int tvfsCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){
return PARENTVFS(pVfs)->xCurrentTime(PARENTVFS(pVfs), pTimeOut);
}
static void tvfsGrowBuffer(TestvfsFile *pFd, int reqSize, int *pNewSize){
TestvfsBuffer *pBuffer = pFd->pShm;
if( reqSize>pBuffer->n ){
pBuffer->a = (u8 *)ckrealloc((char *)pBuffer->a, reqSize);
memset(&pBuffer->a[pBuffer->n], 0x55, reqSize-pBuffer->n);
pBuffer->n = reqSize;
}
*pNewSize = pBuffer->n;
}
static int tvfsInjectIoerr(Testvfs *p){
int ret = 0;
if( p->ioerr ){
@ -624,66 +620,66 @@ static int tvfsShmSize(
int reqSize,
int *pNewSize
){
int rc = SQLITE_OK;
TestvfsFile *pFd = (TestvfsFile *)pFile;
Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
if( p->pScript && p->mask&TESTVFS_SHMSIZE_MASK ){
tvfsExecTcl(p, "xShmSize",
Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, 0
);
tvfsResultCode(p, &rc);
}
if( rc==SQLITE_OK && p->mask&TESTVFS_SHMSIZE_MASK && tvfsInjectIoerr(p) ){
rc = SQLITE_IOERR;
}
if( rc==SQLITE_OK ){
tvfsGrowBuffer(pFd, reqSize, pNewSize);
}
return rc;
assert(0);
return SQLITE_OK;
}
static int tvfsShmGet(
sqlite3_file *pFile,
int reqMapSize,
int *pMapSize,
volatile void **pp
){
assert(0);
return SQLITE_OK;
}
static int tvfsShmRelease(sqlite3_file *pFile){
assert(0);
return SQLITE_OK;
}
static void tvfsAllocPage(TestvfsBuffer *p, int iPage, int pgsz){
assert( iPage<TESTVFS_MAX_PAGES );
if( p->aPage[iPage]==0 ){
p->aPage[iPage] = ckalloc(pgsz);
memset(p->aPage[iPage], 0, pgsz);
p->pgsz = pgsz;
}
}
static int tvfsShmPage(
sqlite3_file *pFile, /* Handle open on database file */
int iPage, /* Page to retrieve */
int pgsz, /* Size of pages */
int isWrite, /* True to extend file if necessary */
void volatile **pp /* OUT: Mapped memory */
){
int rc = SQLITE_OK;
TestvfsFile *pFd = (TestvfsFile *)pFile;
Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
if( p->pScript && p->mask&TESTVFS_SHMGET_MASK ){
tvfsExecTcl(p, "xShmGet",
Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId,
Tcl_NewIntObj(reqMapSize)
if( p->pScript && p->mask&TESTVFS_SHMPAGE_MASK ){
Tcl_Obj *pArg = Tcl_NewObj();
Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(iPage));
Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(pgsz));
Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(isWrite));
tvfsExecTcl(p, "xShmPage",
Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, pArg
);
tvfsResultCode(p, &rc);
}
if( rc==SQLITE_OK && p->mask&TESTVFS_SHMGET_MASK && tvfsInjectIoerr(p) ){
if( rc==SQLITE_OK && p->mask&TESTVFS_SHMPAGE_MASK && tvfsInjectIoerr(p) ){
rc = SQLITE_IOERR;
}
*pMapSize = pFd->pShm->n;
*pp = pFd->pShm->a;
return rc;
}
static int tvfsShmRelease(sqlite3_file *pFile){
int rc = SQLITE_OK;
TestvfsFile *pFd = (TestvfsFile *)pFile;
Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
if( p->pScript && p->mask&TESTVFS_SHMRELEASE_MASK ){
tvfsExecTcl(p, "xShmRelease",
Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, 0
);
tvfsResultCode(p, &rc);
if( rc==SQLITE_OK && isWrite && !pFd->pShm->aPage[iPage] ){
tvfsAllocPage(pFd->pShm, iPage, pgsz);
}
*pp = (void volatile *)pFd->pShm->aPage[iPage];
return rc;
}
static int tvfsShmLock(
sqlite3_file *pFile,
int ofst,
@ -782,10 +778,13 @@ static int tvfsShmClose(
*ppFd = pFd->pNext;
if( pBuffer->pFile==0 ){
int i;
TestvfsBuffer **pp;
for(pp=&p->pBuffer; *pp!=pBuffer; pp=&((*pp)->pNext));
*pp = (*pp)->pNext;
ckfree((char *)pBuffer->a);
for(i=0; pBuffer->aPage[i]; i++){
ckfree((char *)pBuffer->aPage[i]);
}
ckfree((char *)pBuffer);
}
pFd->pShm = 0;
@ -821,6 +820,8 @@ static int testvfs_obj_cmd(
switch( (enum DB_enum)i ){
case CMD_SHM: {
Tcl_Obj *pObj;
int i;
TestvfsBuffer *pBuffer;
char *zName;
if( objc!=3 && objc!=4 ){
@ -838,11 +839,22 @@ static int testvfs_obj_cmd(
if( objc==4 ){
int n;
u8 *a = Tcl_GetByteArrayFromObj(objv[3], &n);
pBuffer->a = (u8 *)ckrealloc((char *)pBuffer->a, n);
pBuffer->n = n;
memcpy(pBuffer->a, a, n);
assert( pBuffer->pgsz==0 || pBuffer->pgsz==32768 );
for(i=0; i*32768<n; i++){
int nByte = 32768;
tvfsAllocPage(pBuffer, i, 32768);
if( n-i*32768<32768 ){
nByte = n;
}
memcpy(pBuffer->aPage[i], &a[i*32768], nByte);
}
}
Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(pBuffer->a, pBuffer->n));
pObj = Tcl_NewObj();
for(i=0; pBuffer->aPage[i]; i++){
Tcl_AppendObjToObj(pObj, Tcl_NewByteArrayObj(pBuffer->aPage[i], 32768));
}
Tcl_SetObjResult(interp, pObj);
break;
}

472
src/wal.c
View File

@ -370,8 +370,8 @@ struct Wal {
sqlite3_file *pDbFd; /* File handle for the database file */
sqlite3_file *pWalFd; /* File handle for WAL file */
u32 iCallback; /* Value to pass to log callback (or 0) */
int szWIndex; /* Size of the wal-index that is mapped in mem */
volatile u32 *pWiData; /* Pointer to wal-index content in memory */
int nWiData; /* Size of array apWiData */
volatile u32 **apWiData; /* Pointer to wal-index content in memory */
u16 szPage; /* Database page size */
i16 readLock; /* Which read lock is being held. -1 for none */
u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */
@ -386,15 +386,77 @@ struct Wal {
#endif
};
/*
** Define the parameters of the hash tables in the wal-index file. There
** is a hash-table following every HASHTABLE_NPAGE page numbers in the
** wal-index.
**
** Changing any of these constants will alter the wal-index format and
** create incompatibilities.
*/
#define HASHTABLE_NPAGE 4096 /* Must be power of 2 and multiple of 256 */
#define HASHTABLE_DATATYPE u16
#define HASHTABLE_HASH_1 383 /* Should be prime */
#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */
#define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT)
/* The block of page numbers associated with the first hash-table in a
** wal-index is smaller than usual. This is so that there is a complete
** hash-table on each aligned 32KB page of the wal-index.
*/
#define HASHTABLE_NPAGE_ONE (4096 - (WALINDEX_HDR_SIZE/sizeof(u32)))
/* The wal-index is divided into pages of HASHTABLE_PAGESIZE bytes each. */
#define HASHTABLE_PAGESIZE (HASHTABLE_NBYTE + HASHTABLE_NPAGE*sizeof(u32))
/*
** Obtain a pointer to the iPage'th page of the wal-index. The wal-index
** is broken into pages of HASHTABLE_PAGESIZE bytes. Wal-index pages are
** numbered from zero.
**
** If this call is successful, *ppPage is set to point to the wal-index
** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs,
** then an SQLite error code is returned and *ppPage is set to 0.
*/
static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){
int rc = SQLITE_OK;
/* Enlarge the pWal->apWiData[] array if required */
if( pWal->nWiData<=iPage ){
int nByte = sizeof(u32 *)*(iPage+1);
volatile u32 **apNew;
apNew = (volatile u32 **)sqlite3_realloc(pWal->apWiData, nByte);
if( !apNew ){
*ppPage = 0;
return SQLITE_NOMEM;
}
memset(&apNew[pWal->nWiData], 0, sizeof(u32 *)*(iPage+1-pWal->nWiData));
pWal->apWiData = apNew;
pWal->nWiData = iPage+1;
}
/* Request a pointer to the required page from the VFS */
if( pWal->apWiData[iPage]==0 ){
rc = sqlite3OsShmPage(pWal->pDbFd, iPage, HASHTABLE_PAGESIZE,
pWal->writeLock, (void volatile **)&pWal->apWiData[iPage]
);
}
*ppPage = pWal->apWiData[iPage];
assert( iPage==0 || *ppPage || rc!=SQLITE_OK );
return rc;
}
/*
** Return a pointer to the WalCkptInfo structure in the wal-index.
*/
static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
assert( pWal->pWiData!=0 );
return (volatile WalCkptInfo*)&pWal->pWiData[sizeof(WalIndexHdr)/2];
volatile u32 *page1 = 0;
walIndexPage(pWal, 0, &page1);
assert( page1 );
return (volatile WalCkptInfo*)&page1[sizeof(WalIndexHdr)/2];
}
/*
** This structure is used to implement an iterator that loops through
** all frames in the WAL in database page order. Where two or more frames
@ -413,12 +475,13 @@ static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
struct WalIterator {
int iPrior; /* Last result returned from the iterator */
int nSegment; /* Size of the aSegment[] array */
int nFinal; /* Elements in aSegment[nSegment-1] */
struct WalSegment {
int iNext; /* Next slot in aIndex[] not previously returned */
u8 *aIndex; /* i0, i1, i2... such that aPgno[iN] ascending */
u32 *aPgno; /* 256 page numbers. Pointer to Wal.pWiData */
} aSegment[1]; /* One for every 256 entries in the WAL */
int iNext; /* Next slot in aIndex[] not yet returned */
HASHTABLE_DATATYPE *aIndex; /* i0, i1, i2... such that aPgno[iN] ascend */
u32 *aPgno; /* Array of page numbers. */
int nEntry; /* Max size of aPgno[] and aIndex[] arrays */
int iZero; /* Frame number associated with aPgno[0] */
} aSegment[1]; /* One for every 32KB page in the WAL */
};
/*
@ -492,7 +555,7 @@ static void walIndexWriteHdr(Wal *pWal){
pWal->hdr.isInit = 1;
walChecksumBytes(1, (u8*)&pWal->hdr, offsetof(WalIndexHdr, aCksum),
0, pWal->hdr.aCksum);
aHdr = (WalIndexHdr*)pWal->pWiData;
walIndexPage(pWal, 0, (volatile u32 **)&aHdr);
memcpy(&aHdr[1], &pWal->hdr, sizeof(WalIndexHdr));
sqlite3OsShmBarrier(pWal->pDbFd);
memcpy(&aHdr[0], &pWal->hdr, sizeof(WalIndexHdr));
@ -586,19 +649,6 @@ static int walDecodeFrame(
return 1;
}
/*
** Define the parameters of the hash tables in the wal-index file. There
** is a hash-table following every HASHTABLE_NPAGE page numbers in the
** wal-index.
**
** Changing any of these constants will alter the wal-index format and
** create incompatibilities.
*/
#define HASHTABLE_NPAGE 4096 /* Must be power of 2 and multiple of 256 */
#define HASHTABLE_DATATYPE u16
#define HASHTABLE_HASH_1 383 /* Should be prime */
#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */
#define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT)
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
/*
@ -663,96 +713,6 @@ static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
walLockName(lockIdx), n));
}
/*
** Return the index in the Wal.pWiData array that corresponds to
** frame iFrame.
**
** Wal.pWiData is an array of u32 elements that is the wal-index.
** The array begins with a header and is then followed by alternating
** "map" and "hash-table" blocks. Each "map" block consists of
** HASHTABLE_NPAGE u32 elements which are page numbers corresponding
** to frames in the WAL file.
**
** This routine returns an index X such that Wal.pWiData[X] is part
** of a "map" block that contains the page number of the iFrame-th
** frame in the WAL file.
*/
static int walIndexEntry(u32 iFrame){
return (
(WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32)
+ (((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NBYTE)/sizeof(u32)
+ (iFrame-1)
);
}
/*
** Return the minimum size of the shared-memory, in bytes, that is needed
** to support a wal-index containing frame iFrame. The value returned
** includes the wal-index header and the complete "block" containing iFrame,
** including the hash table segment that follows the block.
*/
static int walMappingSize(u32 iFrame){
const int nByte = (sizeof(u32)*HASHTABLE_NPAGE + HASHTABLE_NBYTE) ;
return ( WALINDEX_LOCK_OFFSET
+ WALINDEX_LOCK_RESERVED
+ nByte * ((iFrame + HASHTABLE_NPAGE - 1)/HASHTABLE_NPAGE)
);
}
/*
** Release our reference to the wal-index memory map, if we are holding
** it.
*/
static void walIndexUnmap(Wal *pWal){
if( pWal->pWiData ){
sqlite3OsShmRelease(pWal->pDbFd);
}
pWal->pWiData = 0;
pWal->szWIndex = -1;
}
/*
** Map the wal-index file into memory if it isn't already.
**
** The reqSize parameter is the requested size of the mapping. The
** mapping will be at least this big if the underlying storage is
** that big. But the mapping will never grow larger than the underlying
** storage. Use the walIndexRemap() to enlarget the storage space.
*/
static int walIndexMap(Wal *pWal, int reqSize){
int rc = SQLITE_OK;
if( pWal->pWiData==0 || reqSize>pWal->szWIndex ){
walIndexUnmap(pWal);
rc = sqlite3OsShmGet(pWal->pDbFd, reqSize, &pWal->szWIndex,
(void volatile**)(char volatile*)&pWal->pWiData);
if( rc!=SQLITE_OK ){
walIndexUnmap(pWal);
}
}
return rc;
}
/*
** Enlarge the wal-index to be at least enlargeTo bytes in size and
** Remap the wal-index so that the mapping covers the full size
** of the underlying file.
**
** If enlargeTo is non-negative, then increase the size of the underlying
** storage to be at least as big as enlargeTo before remapping.
*/
static int walIndexRemap(Wal *pWal, int enlargeTo){
int rc;
int sz;
assert( pWal->writeLock );
rc = sqlite3OsShmSize(pWal->pDbFd, enlargeTo, &sz);
if( rc==SQLITE_OK && sz>pWal->szWIndex ){
walIndexUnmap(pWal);
rc = walIndexMap(pWal, sz);
}
assert( pWal->szWIndex>=enlargeTo || rc!=SQLITE_OK );
return rc;
}
/*
** Compute a hash on a page number. The resulting hash value must land
** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances
@ -767,6 +727,54 @@ static int walNextHash(int iPriorHash){
return (iPriorHash+1)&(HASHTABLE_NSLOT-1);
}
static void walHashGet(
Wal *pWal, /* WAL handle */
int iHash, /* Find the iHash'th table */
volatile HASHTABLE_DATATYPE **paHash, /* OUT: Pointer to hash index */
volatile u32 **paPgno, /* OUT: Pointer to page number array */
u32 *piZero /* OUT: Frame associated with *paPgno[0] */
){
u32 iZero;
volatile u32 *aPgno;
volatile HASHTABLE_DATATYPE *aHash;
walIndexPage(pWal, iHash, &aPgno);
aHash = (volatile HASHTABLE_DATATYPE *)&aPgno[HASHTABLE_NPAGE];
if( iHash==0 ){
aPgno = &aPgno[WALINDEX_HDR_SIZE/sizeof(u32)-1];
iZero = 0;
}else{
iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE;
aPgno = &aPgno[-1*iZero-1];
}
*paPgno = aPgno;
*paHash = aHash;
*piZero = iZero;
}
static int walFramePage(u32 iFrame){
int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE;
assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE)
&& (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE)
&& (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE))
&& (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)
&& (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE))
);
return iHash;
}
/*
** Return the page number associated with frame iFrame in this WAL.
*/
static u32 walFramePgno(Wal *pWal, u32 iFrame){
int iHash = walFramePage(iFrame);
if( iHash==0 ){
return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1];
}
return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE];
}
/*
** Find the hash table and (section of the) page number array used to
@ -789,27 +797,8 @@ static void walHashFind(
volatile u32 **paPgno, /* OUT: Pointer to page number array */
u32 *piZero /* OUT: Frame associated with *paPgno[0] */
){
u32 iZero;
volatile u32 *aPgno;
volatile HASHTABLE_DATATYPE *aHash;
iZero = ((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NPAGE;
aPgno = &pWal->pWiData[walIndexEntry(iZero+1)-iZero-1];
aHash = (HASHTABLE_DATATYPE *)&aPgno[iZero+HASHTABLE_NPAGE+1];
/* Assert that:
**
** + the mapping is large enough for this hash-table, and
**
** + that aPgno[iZero+1] really is the database page number associated
** with the first frame indexed by this hash table.
*/
assert( (u32*)(&aHash[HASHTABLE_NSLOT])<=&pWal->pWiData[pWal->szWIndex/4] );
assert( walIndexEntry(iZero+1)==(&aPgno[iZero+1] - pWal->pWiData) );
*paHash = aHash;
*paPgno = aPgno;
*piZero = iZero;
int iHash = walFramePage(iFrame);
walHashGet(pWal, iHash, paHash, paPgno, piZero);
}
/*
@ -829,16 +818,16 @@ static void walCleanupHash(Wal *pWal){
volatile u32 *aPgno; /* Unused return from walHashFind() */
u32 iZero; /* frame == (aHash[x]+iZero) */
int iLimit = 0; /* Zero values greater than this */
int nByte; /* Number of bytes to zero in aPgno[] */
int i; /* Used to iterate through aHash[] */
assert( pWal->writeLock );
testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE-1 );
testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE );
testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE+1 );
if( (pWal->hdr.mxFrame % HASHTABLE_NPAGE)>0 ){
int nByte; /* Number of bytes to zero in aPgno[] */
int i; /* Used to iterate through aHash[] */
walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero);
walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero);
if( iZero!=pWal->hdr.mxFrame ){
iLimit = pWal->hdr.mxFrame - iZero;
assert( iLimit>0 );
for(i=0; i<HASHTABLE_NSLOT; i++){
@ -846,13 +835,12 @@ static void walCleanupHash(Wal *pWal){
aHash[i] = 0;
}
}
/* Zero the entries in the aPgno array that correspond to frames with
** frame numbers greater than pWal->hdr.mxFrame.
*/
nByte = sizeof(u32) * (HASHTABLE_NPAGE-iLimit);
memset((void *)&aPgno[iZero+iLimit+1], 0, nByte);
assert( &((u8 *)&aPgno[iZero+iLimit+1])[nByte]==(u8 *)aHash );
nByte = ((char *)aHash - (char *)&aPgno[pWal->hdr.mxFrame+1]);
memset((void *)&aPgno[pWal->hdr.mxFrame+1], 0, nByte);
}
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
@ -878,15 +866,7 @@ static void walCleanupHash(Wal *pWal){
** pPage into WAL frame iFrame.
*/
static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
int rc; /* Return code */
int nMapping; /* Required mapping size in bytes */
/* Make sure the wal-index is mapped. Enlarge the mapping if required. */
nMapping = walMappingSize(iFrame);
rc = walIndexMap(pWal, nMapping);
while( rc==SQLITE_OK && nMapping>pWal->szWIndex ){
rc = walIndexRemap(pWal, nMapping);
}
int rc = SQLITE_OK; /* Return code */
/* Assuming the wal-index file was successfully mapped, find the hash
** table and section of of the page number array that pertain to frame
@ -904,8 +884,8 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero);
idx = iFrame - iZero;
if( idx==1 ){
memset((void*)&aPgno[iZero+1], 0, HASHTABLE_NPAGE*sizeof(u32));
memset((void*)aHash, 0, HASHTABLE_NBYTE);
int nByte = (u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1+iZero];
memset((void*)&aPgno[1+iZero], 0, nByte);
}
assert( idx <= HASHTABLE_NSLOT/2 + 1 );
@ -1076,9 +1056,6 @@ static int walIndexRecover(Wal *pWal){
}
finished:
if( rc==SQLITE_OK && pWal->hdr.mxFrame==0 ){
rc = walIndexRemap(pWal, walMappingSize(1));
}
if( rc==SQLITE_OK ){
volatile WalCkptInfo *pInfo;
int i;
@ -1164,7 +1141,6 @@ int sqlite3WalOpen(
pRet->pVfs = pVfs;
pRet->pWalFd = (sqlite3_file *)&pRet[1];
pRet->pDbFd = pDbFd;
pRet->szWIndex = -1;
pRet->readLock = -1;
sqlite3_randomness(8, &pRet->hdr.aSalt);
pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd;
@ -1207,24 +1183,22 @@ static int walIteratorNext(
u32 iMin; /* Result pgno must be greater than iMin */
u32 iRet = 0xFFFFFFFF; /* 0xffffffff is never a valid page number */
int i; /* For looping through segments */
int nBlock = p->nFinal; /* Number of entries in current segment */
iMin = p->iPrior;
assert( iMin<0xffffffff );
for(i=p->nSegment-1; i>=0; i--){
struct WalSegment *pSegment = &p->aSegment[i];
while( pSegment->iNext<nBlock ){
while( pSegment->iNext<pSegment->nEntry ){
u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]];
if( iPg>iMin ){
if( iPg<iRet ){
iRet = iPg;
*piFrame = i*256 + 1 + pSegment->aIndex[pSegment->iNext];
*piFrame = pSegment->iZero + pSegment->aIndex[pSegment->iNext];
}
break;
}
pSegment->iNext++;
}
nBlock = 256;
}
*piPage = p->iPrior = iRet;
@ -1232,28 +1206,28 @@ static int walIteratorNext(
}
static void walMergesort8(
Pgno *aContent, /* Pages in wal */
u8 *aBuffer, /* Buffer of at least *pnList items to use */
u8 *aList, /* IN/OUT: List to sort */
static void walMergesort(
u32 *aContent, /* Pages in wal */
HASHTABLE_DATATYPE *aBuffer, /* Buffer of at least *pnList items to use */
HASHTABLE_DATATYPE *aList, /* IN/OUT: List to sort */
int *pnList /* IN/OUT: Number of elements in aList[] */
){
int nList = *pnList;
if( nList>1 ){
int nLeft = nList / 2; /* Elements in left list */
int nRight = nList - nLeft; /* Elements in right list */
u8 *aLeft = aList; /* Left list */
u8 *aRight = &aList[nLeft]; /* Right list */
int iLeft = 0; /* Current index in aLeft */
int iRight = 0; /* Current index in aright */
int iOut = 0; /* Current index in output buffer */
HASHTABLE_DATATYPE *aLeft = aList; /* Left list */
HASHTABLE_DATATYPE *aRight = &aList[nLeft]; /* Right list */
/* TODO: Change to non-recursive version. */
walMergesort8(aContent, aBuffer, aLeft, &nLeft);
walMergesort8(aContent, aBuffer, aRight, &nRight);
walMergesort(aContent, aBuffer, aLeft, &nLeft);
walMergesort(aContent, aBuffer, aRight, &nRight);
while( iRight<nRight || iLeft<nLeft ){
u8 logpage;
HASHTABLE_DATATYPE logpage;
Pgno dbpage;
if( (iLeft<nLeft)
@ -1300,60 +1274,68 @@ static void walMergesort8(
** prior to the WalIterator object being destroyed.
*/
static int walIteratorInit(Wal *pWal, WalIterator **pp){
u32 *aData; /* Content of the wal-index file */
WalIterator *p; /* Return value */
int nSegment; /* Number of segments to merge */
u32 iLast; /* Last frame in log */
int nByte; /* Number of bytes to allocate */
int i; /* Iterator variable */
int nFinal; /* Number of unindexed entries */
u8 *aTmp; /* Temp space used by merge-sort */
u8 *aSpace; /* Surplus space on the end of the allocation */
/* Make sure the wal-index is mapped into local memory */
assert( pWal->pWiData && pWal->szWIndex>=walMappingSize(pWal->hdr.mxFrame) );
HASHTABLE_DATATYPE *aTmp; /* Temp space used by merge-sort */
HASHTABLE_DATATYPE *aSpace; /* Space at the end of the allocation */
/* This routine only runs while holding SQLITE_SHM_CHECKPOINT. No other
** thread is able to write to shared memory while this routine is
** running (or, indeed, while the WalIterator object exists). Hence,
** we can cast off the volatile qualifacation from shared memory
** we can cast off the volatile qualification from shared memory
*/
assert( pWal->ckptLock );
aData = (u32*)pWal->pWiData;
iLast = pWal->hdr.mxFrame;
/* Allocate space for the WalIterator object */
iLast = pWal->hdr.mxFrame;
nSegment = (iLast >> 8) + 1;
nFinal = (iLast & 0x000000FF);
nByte = sizeof(WalIterator) + (nSegment+1)*(sizeof(struct WalSegment)+256);
nSegment = walFramePage(iLast) + 1;
nByte = sizeof(WalIterator)
+ nSegment*(sizeof(struct WalSegment))
+ (nSegment+1)*(HASHTABLE_NPAGE * sizeof(HASHTABLE_DATATYPE));
p = (WalIterator *)sqlite3_malloc(nByte);
if( !p ){
return SQLITE_NOMEM;
}
memset(p, 0, nByte);
/* Initialize the WalIterator object. Each 256-entry segment is
** presorted in order to make iterating through all entries much
** faster.
*/
/* Allocate space for the WalIterator object */
p->nSegment = nSegment;
aSpace = (u8 *)&p->aSegment[nSegment];
aTmp = &aSpace[nSegment*256];
aSpace = (HASHTABLE_DATATYPE *)&p->aSegment[nSegment];
aTmp = &aSpace[HASHTABLE_NPAGE*nSegment];
for(i=0; i<nSegment; i++){
volatile HASHTABLE_DATATYPE *pDummy;
int j;
int nIndex = (i==nSegment-1) ? nFinal : 256;
p->aSegment[i].aPgno = &aData[walIndexEntry(i*256+1)];
p->aSegment[i].aIndex = aSpace;
for(j=0; j<nIndex; j++){
u32 iZero;
int nEntry;
volatile u32 *aPgno;
walHashGet(pWal, i, &pDummy, &aPgno, &iZero);
if( i==(nSegment-1) ){
nEntry = iLast - iZero;
}else if( i==0 ){
nEntry = HASHTABLE_NPAGE_ONE;
}else{
nEntry = HASHTABLE_NPAGE;
}
iZero++;
aPgno += iZero;
for(j=0; j<nEntry; j++){
aSpace[j] = j;
}
walMergesort8(p->aSegment[i].aPgno, aTmp, aSpace, &nIndex);
memset(&aSpace[nIndex], aSpace[nIndex-1], 256-nIndex);
aSpace += 256;
p->nFinal = nIndex;
walMergesort((u32 *)aPgno, aTmp, aSpace, &nEntry);
p->aSegment[i].iZero = iZero;
p->aSegment[i].nEntry = nEntry;
p->aSegment[i].aIndex = aSpace;
p->aSegment[i].aPgno = (u32 *)aPgno;
aSpace += HASHTABLE_NPAGE;
}
assert( aSpace==aTmp );
/* Return the fully initializd WalIterator object */
/* Return the fully initialized WalIterator object */
*pp = p;
return SQLITE_OK ;
}
@ -1430,8 +1412,8 @@ static int walCheckpoint(
** cannot be backfilled from the WAL.
*/
mxSafeFrame = pWal->hdr.mxFrame;
pHdr = (volatile WalIndexHdr*)pWal->pWiData;
pInfo = (volatile WalCkptInfo*)&pHdr[2];
walIndexPage(pWal, 0, (volatile u32 **)&pHdr);
pInfo = walCkptInfo(pWal);
assert( pInfo==walCkptInfo(pWal) );
for(i=1; i<WAL_NREADER; i++){
u32 y = pInfo->aReadMark[i];
@ -1461,6 +1443,7 @@ static int walCheckpoint(
/* Iterate through the contents of the WAL, copying data to the db file. */
while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
assert( walFramePgno(pWal, iFrame)==iDbpage );
if( iFrame<=nBackfill || iFrame>mxSafeFrame ) continue;
rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage,
walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE
@ -1525,7 +1508,6 @@ int sqlite3WalClose(
if( rc==SQLITE_OK ){
isDelete = 1;
}
walIndexUnmap(pWal);
}
walIndexClose(pWal, isDelete);
@ -1534,6 +1516,7 @@ int sqlite3WalClose(
sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0);
}
WALTRACE(("WAL%p: closed\n", pWal));
sqlite3_free(pWal->apWiData);
sqlite3_free(pWal);
}
return rc;
@ -1560,13 +1543,14 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){
u32 aCksum[2]; /* Checksum on the header content */
WalIndexHdr h1, h2; /* Two copies of the header content */
WalIndexHdr *aHdr; /* Header in shared memory */
volatile u32 *page1 = 0;
if( pWal->szWIndex < WALINDEX_HDR_SIZE ){
walIndexPage(pWal, 0, &page1);
if( !page1 ){
/* The wal-index is not large enough to hold the header, then assume
** header is invalid. */
return 1;
}
assert( pWal->pWiData );
/* Read the header. This might happen currently with a write to the
** same area of shared memory on a different CPU in a SMP,
@ -1578,7 +1562,7 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){
** Memory barriers are used to prevent the compiler or the hardware from
** reordering the reads and writes.
*/
aHdr = (WalIndexHdr*)pWal->pWiData;
aHdr = (WalIndexHdr*)page1;
memcpy(&h1, &aHdr[0], sizeof(h1));
sqlite3OsShmBarrier(pWal->pDbFd);
memcpy(&h2, &aHdr[1], sizeof(h2));
@ -1625,9 +1609,10 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){
static int walIndexReadHdr(Wal *pWal, int *pChanged){
int rc; /* Return code */
int badHdr; /* True if a header read failed */
volatile u32 *dummy;
assert( pChanged );
rc = walIndexMap(pWal, walMappingSize(1));
rc = walIndexPage(pWal, 0, &dummy);
if( rc!=SQLITE_OK ){
return rc;
}
@ -1659,14 +1644,6 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){
}
}
/* Make sure the mapping is large enough to cover the entire wal-index */
if( rc==SQLITE_OK ){
int szWanted = walMappingSize(pWal->hdr.mxFrame);
if( pWal->szWIndex<szWanted ){
rc = walIndexMap(pWal, szWanted);
}
}
return rc;
}
@ -1710,7 +1687,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
u32 mxReadMark; /* Largest aReadMark[] value */
int mxI; /* Index of largest aReadMark[] value */
int i; /* Loop counter */
int rc; /* Return code */
int rc = SQLITE_OK; /* Return code */
assert( pWal->readLock<0 ); /* Not currently locked */
@ -1739,16 +1716,14 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
rc = SQLITE_BUSY_RECOVERY;
}
}
}else{
rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
}
if( rc!=SQLITE_OK ){
return rc;
}
pHdr = (volatile WalIndexHdr*)pWal->pWiData;
pInfo = (volatile WalCkptInfo*)&pHdr[2];
assert( pInfo==walCkptInfo(pWal) );
walIndexPage(pWal, 0, (volatile u32 **)&pHdr);
pInfo = walCkptInfo(pWal);
assert( pInfo==(volatile WalCkptInfo *)&pHdr[2] );
if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){
/* The WAL has been completely backfilled (or it is empty).
** and can be safely ignored.
@ -1883,7 +1858,6 @@ int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
do{
rc = walTryBeginRead(pWal, pChanged, 0, ++cnt);
}while( rc==WAL_RETRY );
walIndexUnmap(pWal);
return rc;
}
@ -1913,7 +1887,6 @@ int sqlite3WalRead(
int nOut, /* Size of buffer pOut in bytes */
u8 *pOut /* Buffer to write page data to */
){
int rc; /* Return code */
u32 iRead = 0; /* If !=0, WAL frame to return data from */
u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */
int iHash; /* Used to loop through N hash tables */
@ -1932,12 +1905,6 @@ int sqlite3WalRead(
return SQLITE_OK;
}
/* Ensure the wal-index is mapped. */
rc = walIndexMap(pWal, walMappingSize(iLast));
if( rc!=SQLITE_OK ){
return rc;
}
/* Search the hash table or tables for an entry matching page number
** pgno. Each iteration of the following for() loop searches one
** hash table (each hash table indexes up to HASHTABLE_NPAGE frames).
@ -1963,16 +1930,13 @@ int sqlite3WalRead(
** This condition filters out entries that were added to the hash
** table after the current read-transaction had started.
*/
for(iHash=iLast; iHash>0 && iRead==0; iHash-=HASHTABLE_NPAGE){
for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){
volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table */
volatile u32 *aPgno; /* Pointer to array of page numbers */
u32 iZero; /* Frame number corresponding to aPgno[0] */
int iKey; /* Hash slot index */
int mxHash; /* upper bound on aHash[] values */
walHashFind(pWal, iHash, &aHash, &aPgno, &iZero);
mxHash = iLast - iZero;
if( mxHash > HASHTABLE_NPAGE ) mxHash = HASHTABLE_NPAGE;
walHashGet(pWal, iHash, &aHash, &aPgno, &iZero);
for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
u32 iFrame = aHash[iKey] + iZero;
if( iFrame<=iLast && aPgno[iFrame]==pgno ){
@ -1981,7 +1945,6 @@ int sqlite3WalRead(
}
}
}
assert( iRead==0 || pWal->pWiData[walIndexEntry(iRead)]==pgno );
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
/* If expensive assert() statements are available, do a linear search
@ -1991,7 +1954,7 @@ int sqlite3WalRead(
u32 iRead2 = 0;
u32 iTest;
for(iTest=iLast; iTest>0; iTest--){
if( pWal->pWiData[walIndexEntry(iTest)]==pgno ){
if( walFramePgno(pWal, iTest)==pgno ){
iRead2 = iTest;
break;
}
@ -2003,7 +1966,6 @@ int sqlite3WalRead(
/* If iRead is non-zero, then it is the log frame number that contains the
** required page. Read and return data from the log file.
*/
walIndexUnmap(pWal);
if( iRead ){
i64 iOffset = walFrameOffset(iRead, pWal->hdr.szPage) + WAL_FRAME_HDRSIZE;
*pInWal = 1;
@ -2039,6 +2001,7 @@ void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){
*/
int sqlite3WalBeginWriteTransaction(Wal *pWal){
int rc;
volatile u32 *page1;
/* Cannot start a write transaction without first holding a read
** transaction. */
@ -2057,19 +2020,13 @@ int sqlite3WalBeginWriteTransaction(Wal *pWal){
** time the read transaction on this connection was started, then
** the write is disallowed.
*/
rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
if( rc ){
walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
pWal->writeLock = 0;
return rc;
}
if( memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))!=0 ){
walIndexPage(pWal, 0, &page1);
if( memcmp(&pWal->hdr, (void*)page1, sizeof(WalIndexHdr))!=0 ){
walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
pWal->writeLock = 0;
rc = SQLITE_BUSY;
}
walIndexUnmap(pWal);
return rc;
}
@ -2102,11 +2059,7 @@ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
Pgno iMax = pWal->hdr.mxFrame;
Pgno iFrame;
assert( pWal->pWiData==0 );
rc = walIndexReadHdr(pWal, &unused);
if( rc==SQLITE_OK ){
rc = walIndexMap(pWal, walMappingSize(iMax));
}
if( rc==SQLITE_OK ){
for(iFrame=pWal->hdr.mxFrame+1;
ALWAYS(rc==SQLITE_OK) && iFrame<=iMax;
@ -2124,12 +2077,11 @@ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
** committed. As a result, the call to xUndo may not fail.
*/
assert( pWal->writeLock );
assert( pWal->pWiData[walIndexEntry(iFrame)]!=1 );
rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]);
assert( walFramePgno(pWal, iFrame)!=1 );
rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame));
}
walCleanupHash(pWal);
}
walIndexUnmap(pWal);
}
return rc;
}
@ -2170,7 +2122,6 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
}
if( aWalData[0]<pWal->hdr.mxFrame ){
rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
pWal->hdr.mxFrame = aWalData[0];
pWal->hdr.aFrameCksum[0] = aWalData[1];
pWal->hdr.aFrameCksum[1] = aWalData[2];
@ -2179,7 +2130,6 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
}
}
walIndexUnmap(pWal);
return rc;
}
@ -2199,9 +2149,7 @@ static int walRestartLog(Wal *pWal){
int rc = SQLITE_OK;
int cnt;
if( pWal->readLock==0
&& SQLITE_OK==(rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)))
){
if( pWal->readLock==0 ){
volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
assert( pInfo->nBackfill==pWal->hdr.mxFrame );
if( pInfo->nBackfill>0 ){
@ -2237,11 +2185,6 @@ static int walRestartLog(Wal *pWal){
int notUsed;
rc = walTryBeginRead(pWal, &notUsed, 1, ++cnt);
}while( rc==WAL_RETRY );
/* Unmap the wal-index before returning. Otherwise the VFS layer may
** hold a mutex for the duration of the IO performed by WalFrames().
*/
walIndexUnmap(pWal);
}
return rc;
}
@ -2267,7 +2210,6 @@ int sqlite3WalFrames(
assert( pList );
assert( pWal->writeLock );
assert( pWal->pWiData==0 );
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
{ int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
@ -2280,10 +2222,8 @@ int sqlite3WalFrames(
** log file, instead of appending to it at pWal->hdr.mxFrame.
*/
if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){
assert( pWal->pWiData==0 );
return rc;
}
assert( pWal->pWiData==0 && pWal->readLock>0 );
/* If this is the first frame written into the log, write the WAL
** header to the start of the WAL file. See comments at the top of
@ -2358,7 +2298,6 @@ int sqlite3WalFrames(
rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
}
assert( pWal->pWiData==0 );
/* Append data to the wal-index. It is not necessary to lock the
** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index
@ -2391,7 +2330,6 @@ int sqlite3WalFrames(
}
}
walIndexUnmap(pWal);
WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok"));
return rc;
}
@ -2412,7 +2350,6 @@ int sqlite3WalCheckpoint(
int rc; /* Return code */
int isChanged = 0; /* True if a new wal-index header is loaded */
assert( pWal->pWiData==0 );
assert( pWal->ckptLock==0 );
WALTRACE(("WAL%p: checkpoint begins\n", pWal));
@ -2441,7 +2378,6 @@ int sqlite3WalCheckpoint(
}
/* Release the locks. */
walIndexUnmap(pWal);
walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
pWal->ckptLock = 0;
WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok"));

View File

@ -13,6 +13,7 @@
set testdir [file dirname $argv0]
source $testdir/tester.tcl
db close
# Argument processing.
#

View File

@ -75,9 +75,14 @@ proc incr_tvfs_hdr {file idx incrval} {
# database content.
#
do_test wal2-1.0 {
proc tvfs_cb {method args} { return SQLITE_OK }
proc tvfs_cb {method filename args} {
set ::filename $filename
return SQLITE_OK
}
testvfs tvfs
tvfs script tvfs_cb
tvfs filter xShmOpen
sqlite3 db test.db -vfs tvfs
sqlite3 db2 test.db -vfs tvfs
@ -123,21 +128,15 @@ foreach {tn iInsert res wal_index_hdr_mod wal_locks} "
do_test wal2-1.$tn.1 {
execsql { INSERT INTO t1 VALUES($iInsert) }
set ::locks [list]
set ::cb_done 0
proc tvfs_cb {method args} {
if {$::cb_done == 0 && $method == "xShmGet"} {
set ::cb_done 1
if {$::wal_index_hdr_mod >= 0} {
incr_tvfs_hdr [lindex $args 0] $::wal_index_hdr_mod 1
}
}
if {$method == "xShmLock"} { lappend ::locks [lindex $args 2] }
lappend ::locks [lindex $args 2]
return SQLITE_OK
}
tvfs filter xShmLock
if {$::wal_index_hdr_mod >= 0} {
incr_tvfs_hdr $::filename $::wal_index_hdr_mod 1
}
execsql { SELECT count(a), sum(a) FROM t1 } db2
} $res
@ -174,8 +173,9 @@ do_test wal2-2.0 {
testvfs tvfs
tvfs script tvfs_cb
tvfs filter xShmOpen
proc tvfs_cb {method args} {
if {$method == "xShmOpen"} { set ::shm_file [lindex $args 0] }
set ::filename [lindex $args 0]
return SQLITE_OK
}
@ -208,32 +208,28 @@ foreach {tn iInsert res0 res1 wal_index_hdr_mod} {
8 11 {10 55} {11 66} 6
9 12 {11 66} {12 78} 7
} {
tvfs filter xShmLock
do_test wal2-2.$tn.1 {
set oldhdr [set_tvfs_hdr $::shm_file]
set oldhdr [set_tvfs_hdr $::filename]
execsql { INSERT INTO t1 VALUES($iInsert) }
execsql { SELECT count(a), sum(a) FROM t1 }
} $res1
do_test wal2-2.$tn.2 {
set ::locks [list]
set ::cb_done 0
proc tvfs_cb {method args} {
if {$::cb_done == 0 && $method == "xShmGet"} {
set ::cb_done 1
if {$::wal_index_hdr_mod >= 0} {
incr_tvfs_hdr $::shm_file $::wal_index_hdr_mod 1
}
}
if {$method == "xShmLock"} {
set lock [lindex $args 2]
lappend ::locks $lock
if {$lock == $::WRITER} {
set_tvfs_hdr $::shm_file $::oldhdr
}
set lock [lindex $args 2]
lappend ::locks $lock
if {$lock == $::WRITER} {
set_tvfs_hdr $::filename $::oldhdr
}
return SQLITE_OK
}
if {$::wal_index_hdr_mod >= 0} {
incr_tvfs_hdr $::filename $::wal_index_hdr_mod 1
}
execsql { SELECT count(a), sum(a) FROM t1 } db2
} $res0
@ -243,21 +239,15 @@ foreach {tn iInsert res0 res1 wal_index_hdr_mod} {
do_test wal2-2.$tn.4 {
set ::locks [list]
set ::cb_done 0
proc tvfs_cb {method args} {
if {$::cb_done == 0 && $method == "xShmGet"} {
set ::cb_done 1
if {$::wal_index_hdr_mod >= 0} {
incr_tvfs_hdr $::shm_file $::wal_index_hdr_mod 1
}
}
if {$method == "xShmLock"} {
set lock [lindex $args 2]
lappend ::locks $lock
}
set lock [lindex $args 2]
lappend ::locks $lock
return SQLITE_OK
}
if {$::wal_index_hdr_mod >= 0} {
incr_tvfs_hdr $::filename $::wal_index_hdr_mod 1
}
execsql { SELECT count(a), sum(a) FROM t1 } db2
} $res1
}

View File

@ -353,7 +353,7 @@ T script method_callback
proc method_callback {method args} {
if {$method == "xShmBarrier"} {
incr ::barrier_count
if {$::barrier_count == 1} {
if {$::barrier_count == 2} {
# This code is executed within the xShmBarrier() callback invoked
# by the client running recovery as part of writing the recovered
# wal-index header. If a second client attempts to access the