Experimental change to the xShmXXX parts of the VFS interface.

FossilOrigin-Name: ca68472db01c14a899892007d1cbaff5e86ae193
This commit is contained in:
dan 2010-06-11 19:04:21 +00:00
parent 0b9b4301b8
commit 13a3cb82ce
12 changed files with 428 additions and 395 deletions

View File

@ -1,8 +1,5 @@
-----BEGIN PGP SIGNED MESSAGE----- C Experimental\schange\sto\sthe\sxShmXXX\sparts\sof\sthe\sVFS\sinterface.
Hash: SHA1 D 2010-06-11T19:04:21
C Refactor\sand\ssimplify\sthe\slogic\sused\sto\schange\sjournalmode.
D 2010-06-11T17:01:25
F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0 F Makefile.arm-wince-mingw32ce-gcc fcd5e9cd67fe88836360bb4f9ef4cb7f8e2fb5a0
F Makefile.in a5cad1f8f3e021356bfcc6c77dc16f6f1952bbc3 F Makefile.in a5cad1f8f3e021356bfcc6c77dc16f6f1952bbc3
F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654 F Makefile.linux-gcc d53183f4aa6a9192d249731c90dbdffbd2c68654
@ -153,11 +150,11 @@ F src/mutex_os2.c 6a62583e374ba3ac1a3fcc0da2bfdac7d3942689
F src/mutex_unix.c cf84466b4fdd2baa0d5a10bb19f08b2abc1ce42e F src/mutex_unix.c cf84466b4fdd2baa0d5a10bb19f08b2abc1ce42e
F src/mutex_w32.c 1fe0e735897be20e09dd6f53c3fb516c6b48c0eb F src/mutex_w32.c 1fe0e735897be20e09dd6f53c3fb516c6b48c0eb
F src/notify.c cbfa66a836da3a51567209636e6a94059c137930 F src/notify.c cbfa66a836da3a51567209636e6a94059c137930
F src/os.c 1516984144e26734f97748f891f1a04f9e294c2e F src/os.c 00ab9dcdee6e33ff3d060744c86af25200c51e0f
F src/os.h 6f529984a29511c7a3479cfe549c10bfa131532f F src/os.h a0d2c1436cb6003e6da16001499a0b828f1edb34
F src/os_common.h a8f95b81eca8a1ab8593d23e94f8a35f35d4078f F src/os_common.h a8f95b81eca8a1ab8593d23e94f8a35f35d4078f
F src/os_os2.c 665876d5eec7585226b0a1cf5e18098de2b2da19 F src/os_os2.c 665876d5eec7585226b0a1cf5e18098de2b2da19
F src/os_unix.c 12051d37e533cdaa8bb13c9d9fe2a13e08552187 F src/os_unix.c 29dac62790ccea7db1516be3abb007988accb165
F src/os_win.c 0cf1f571546f165001e2391b5d4a4a16d86977d3 F src/os_win.c 0cf1f571546f165001e2391b5d4a4a16d86977d3
F src/pager.c 2964185d4356d0dc159b8340e52d2538d32394e5 F src/pager.c 2964185d4356d0dc159b8340e52d2538d32394e5
F src/pager.h ca1f23c0cf137ac26f8908df2427c8b308361efd F src/pager.h ca1f23c0cf137ac26f8908df2427c8b308361efd
@ -173,7 +170,7 @@ F src/resolve.c ac5f1a713cd1ae77f08b83cc69581e11bf5ae6f9
F src/rowset.c 69afa95a97c524ba6faf3805e717b5b7ae85a697 F src/rowset.c 69afa95a97c524ba6faf3805e717b5b7ae85a697
F src/select.c c03d8a0565febcde8c6a12c5d77d065fddae889b F src/select.c c03d8a0565febcde8c6a12c5d77d065fddae889b
F src/shell.c fd4ccdb37c3b68de0623eb938a649e0990710714 F src/shell.c fd4ccdb37c3b68de0623eb938a649e0990710714
F src/sqlite.h.in b6a64327e174cf725e57dd93ddf1e97c52dd41e2 F src/sqlite.h.in 092df034f4b426ffbb9e5bb905958fa35bbb7f7a
F src/sqlite3ext.h 69dfb8116af51b84a029cddb3b35062354270c89 F src/sqlite3ext.h 69dfb8116af51b84a029cddb3b35062354270c89
F src/sqliteInt.h 242987ebd2366ea36650a09cdab04a9163c62109 F src/sqliteInt.h 242987ebd2366ea36650a09cdab04a9163c62109
F src/sqliteLimit.h 196e2f83c3b444c4548fc1874f52f84fdbda40f3 F src/sqliteLimit.h 196e2f83c3b444c4548fc1874f52f84fdbda40f3
@ -195,7 +192,7 @@ F src/test_backup.c c129c91127e9b46e335715ae2e75756e25ba27de
F src/test_btree.c 47cd771250f09cdc6e12dda5bc71bc0b3abc96e2 F src/test_btree.c 47cd771250f09cdc6e12dda5bc71bc0b3abc96e2
F src/test_config.c 6210f501d358bde619ae761f06f123529c6ba24f F src/test_config.c 6210f501d358bde619ae761f06f123529c6ba24f
F src/test_demovfs.c da81a5f7785bb352bda7911c332a983ec4f17f27 F src/test_demovfs.c da81a5f7785bb352bda7911c332a983ec4f17f27
F src/test_devsym.c 709712f5157667410cd0dad1b7b1b54319c122c5 F src/test_devsym.c cf64a4b602ccde10c9261283d1b9be12f4c4a0ea
F src/test_func.c 13b582345fb1185a93e46c53310fae8547dcce20 F src/test_func.c 13b582345fb1185a93e46c53310fae8547dcce20
F src/test_hexio.c 1237f000ec7a491009b1233f5c626ea71bce1ea2 F src/test_hexio.c 1237f000ec7a491009b1233f5c626ea71bce1ea2
F src/test_init.c 5d624ffd0409d424cf9adbfe1f056b200270077c F src/test_init.c 5d624ffd0409d424cf9adbfe1f056b200270077c
@ -212,7 +209,7 @@ F src/test_schema.c 8c06ef9ddb240c7a0fcd31bc221a6a2aade58bf0
F src/test_server.c bbba05c144b5fc4b52ff650a4328027b3fa5fcc6 F src/test_server.c bbba05c144b5fc4b52ff650a4328027b3fa5fcc6
F src/test_tclvar.c f4dc67d5f780707210d6bb0eb6016a431c04c7fa F src/test_tclvar.c f4dc67d5f780707210d6bb0eb6016a431c04c7fa
F src/test_thread.c aa9919c885a1fe53eafc73492f0898ee6c0a0726 F src/test_thread.c aa9919c885a1fe53eafc73492f0898ee6c0a0726
F src/test_vfs.c d329e3ea93624f65d7b6a46209861ddecea4e21d F src/test_vfs.c b83206d2c04b3ba84d8d85420c4c7573c58feba5
F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9 F src/test_wsd.c 41cadfd9d97fe8e3e4e44f61a4a8ccd6f7ca8fe9
F src/tokenize.c 25ceb0f0a746ea1d0f9553787f3f0a56853cfaeb F src/tokenize.c 25ceb0f0a746ea1d0f9553787f3f0a56853cfaeb
F src/trigger.c 8927588cb9e6d47f933b53bfe74200fbb504100d F src/trigger.c 8927588cb9e6d47f933b53bfe74200fbb504100d
@ -229,7 +226,7 @@ F src/vdbeblob.c 5327132a42a91e8b7acfb60b9d2c3b1c5c863e0e
F src/vdbemem.c 2a82f455f6ca6f78b59fb312f96054c04ae0ead1 F src/vdbemem.c 2a82f455f6ca6f78b59fb312f96054c04ae0ead1
F src/vdbetrace.c 864cef96919323482ebd9986f2132435115e9cc2 F src/vdbetrace.c 864cef96919323482ebd9986f2132435115e9cc2
F src/vtab.c a0f8a40274e4261696ef57aa806de2776ab72cda F src/vtab.c a0f8a40274e4261696ef57aa806de2776ab72cda
F src/wal.c 2cdfea9a5e50e4dde48767e69e1fead2ff1781cd F src/wal.c 0aa364734d6daca75771944fc2b4a8f36e63fc4e
F src/wal.h 4ace25262452d17e7d3ec970c89ee17794004008 F src/wal.h 4ace25262452d17e7d3ec970c89ee17794004008
F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f F src/walker.c 3112bb3afe1d85dc52317cb1d752055e9a781f8f
F src/where.c 1c895bef33d0dfc7ed90fb1f74120435d210ea56 F src/where.c 1c895bef33d0dfc7ed90fb1f74120435d210ea56
@ -540,7 +537,7 @@ F test/pageropt.test 8146bf448cf09e87bb1867c2217b921fb5857806
F test/pagesize.test 76aa9f23ecb0741a4ed9d2e16c5fa82671f28efb F test/pagesize.test 76aa9f23ecb0741a4ed9d2e16c5fa82671f28efb
F test/pcache.test eebc4420b37cb07733ae9b6e99c9da7c40dd6d58 F test/pcache.test eebc4420b37cb07733ae9b6e99c9da7c40dd6d58
F test/pcache2.test 0d85f2ab6963aee28c671d4c71bec038c00a1d16 F test/pcache2.test 0d85f2ab6963aee28c671d4c71bec038c00a1d16
F test/permutations.test ad10d7b31b4a585977380886c832e2ac13c41237 F test/permutations.test 64fbafa685149be54a1ceb545942911f998c604d
F test/pragma.test 6960f9efbce476f70ba9ee2171daf5042f9e3d8a F test/pragma.test 6960f9efbce476f70ba9ee2171daf5042f9e3d8a
F test/pragma2.test 5364893491b9231dd170e3459bfc2e2342658b47 F test/pragma2.test 5364893491b9231dd170e3459bfc2e2342658b47
F test/printf.test 05970cde31b1a9f54bd75af60597be75a5c54fea F test/printf.test 05970cde31b1a9f54bd75af60597be75a5c54fea
@ -768,8 +765,8 @@ F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5
F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8 F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8
F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d F test/vtab_shared.test 0eff9ce4f19facbe0a3e693f6c14b80711a4222d
F test/wal.test 0a599c3c4812ed92bc7ad9efcc2c4007fe4cc99a F test/wal.test 0a599c3c4812ed92bc7ad9efcc2c4007fe4cc99a
F test/wal2.test 854a2b409450f1cb756c2bbd1e87e30740094357 F test/wal2.test f9dce93acecff697fc1935869b1ae4cb7dc14587
F test/wal3.test ae876ff988af5b2b34d27474e0dd1a8c84e9bbcb F test/wal3.test 1d3aee1a0295db941a0323c0ce5ac16bd5b7689d
F test/wal_common.tcl 3e953ae60919281688ea73e4d0aa0e1bc94becd9 F test/wal_common.tcl 3e953ae60919281688ea73e4d0aa0e1bc94becd9
F test/walbak.test e7650a26eb4b8abeca9b145b1af1e63026dde432 F test/walbak.test e7650a26eb4b8abeca9b145b1af1e63026dde432
F test/walcksum.test 4efa8fb88c32bed8288ea4385a9cc113a5c8f0bf F test/walcksum.test 4efa8fb88c32bed8288ea4385a9cc113a5c8f0bf
@ -823,14 +820,10 @@ F tool/speedtest2.tcl ee2149167303ba8e95af97873c575c3e0fab58ff
F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224 F tool/speedtest8.c 2902c46588c40b55661e471d7a86e4dd71a18224
F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e F tool/speedtest8inst1.c 293327bc76823f473684d589a8160bde1f52c14e
F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f F tool/vdbe-compress.tcl d70ea6d8a19e3571d7ab8c9b75cba86d1173ff0f
P af353bd89e5ec89f113d217225cc59cbc8373d64 P 95cc3f6fdec5494560c3cd4439d06870d1c62506
R 3e2d7a9cc84ff16ee3e947d8ce602cce R 05bb7be31606a33c54980de0a121cf7e
U drh T *branch * experimental
Z e14f3acbecbf2df0abe6fde12480ec15 T *sym-experimental *
-----BEGIN PGP SIGNATURE----- T -sym-trunk *
Version: GnuPG v1.4.6 (GNU/Linux) U dan
Z 7e046a655e9f9112461a59731e31d3dc
iD8DBQFMEmvooxKgR168RlERAg5RAJ9VmP08NyW1of8QWkDMnAiHK8A/xgCeLBcs
hbn4o1Zj2vKA/g5KFcfNycs=
=Li8t
-----END PGP SIGNATURE-----

View File

@ -1 +1 @@
95cc3f6fdec5494560c3cd4439d06870d1c62506 ca68472db01c14a899892007d1cbaff5e86ae193

View File

@ -119,6 +119,15 @@ void sqlite3OsShmBarrier(sqlite3_file *id){
int sqlite3OsShmClose(sqlite3_file *id, int deleteFlag){ int sqlite3OsShmClose(sqlite3_file *id, int deleteFlag){
return id->pMethods->xShmClose(id, deleteFlag); return id->pMethods->xShmClose(id, deleteFlag);
} }
int sqlite3OsShmPage(
sqlite3_file *id,
int iPage,
int pgsz,
int isWrite,
void volatile **pp
){
return id->pMethods->xShmPage(id, iPage, pgsz, isWrite, pp);
}
/* /*
** The next group of routines are convenience wrappers around the ** The next group of routines are convenience wrappers around the

View File

@ -254,6 +254,7 @@ int sqlite3OsShmRelease(sqlite3_file *id);
int sqlite3OsShmLock(sqlite3_file *id, int, int, int); int sqlite3OsShmLock(sqlite3_file *id, int, int, int);
void sqlite3OsShmBarrier(sqlite3_file *id); void sqlite3OsShmBarrier(sqlite3_file *id);
int sqlite3OsShmClose(sqlite3_file *id, int); int sqlite3OsShmClose(sqlite3_file *id, int);
int sqlite3OsShmPage(sqlite3_file *,int,int,int,void volatile **);
/* /*
** Functions for accessing sqlite3_vfs methods ** Functions for accessing sqlite3_vfs methods

View File

@ -3141,8 +3141,14 @@ struct unixShmNode {
sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */ sqlite3_mutex *mutexBuf; /* Mutex to access zBuf[] */
char *zFilename; /* Name of the mmapped file */ char *zFilename; /* Name of the mmapped file */
int h; /* Open file descriptor */ int h; /* Open file descriptor */
int szMap; /* Size of the mapping into memory */ int szMap; /* Size of the mapping into memory */
char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */ char *pMMapBuf; /* Where currently mmapped(). NULL if unmapped */
int pgsz; /* Size of shared-memory pages */
int nPage; /* Size of array apPage */
char **apPage; /* Array of mapped shared-memory pages */
int nRef; /* Number of unixShm objects pointing to this */ int nRef; /* Number of unixShm objects pointing to this */
unixShm *pFirst; /* All unixShm objects pointing to this */ unixShm *pFirst; /* All unixShm objects pointing to this */
#ifdef SQLITE_DEBUG #ifdef SQLITE_DEBUG
@ -3266,10 +3272,15 @@ static void unixShmPurge(unixFile *pFd){
unixShmNode *p = pFd->pInode->pShmNode; unixShmNode *p = pFd->pInode->pShmNode;
assert( unixMutexHeld() ); assert( unixMutexHeld() );
if( p && p->nRef==0 ){ if( p && p->nRef==0 ){
int i;
assert( p->pInode==pFd->pInode ); assert( p->pInode==pFd->pInode );
if( p->mutex ) sqlite3_mutex_free(p->mutex); if( p->mutex ) sqlite3_mutex_free(p->mutex);
if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf); if( p->mutexBuf ) sqlite3_mutex_free(p->mutexBuf);
if( p->pMMapBuf ) munmap(p->pMMapBuf, p->szMap); if( p->pMMapBuf ) munmap(p->pMMapBuf, p->szMap);
for(i=0; i<p->nPage; i++){
munmap(p->apPage[i], p->pgsz);
}
sqlite3_free(p->apPage);
if( p->h>=0 ) close(p->h); if( p->h>=0 ) close(p->h);
p->pInode->pShmNode = 0; p->pInode->pShmNode = 0;
sqlite3_free(p); sqlite3_free(p);
@ -3706,6 +3717,71 @@ static void unixShmBarrier(
unixLeaveMutex(); unixLeaveMutex();
} }
static int unixShmPage(
sqlite3_file *fd, /* Handle open on database file */
int iPage, /* Page to retrieve */
int pgsz, /* Size of pages */
int isWrite, /* True to extend file if necessary */
void volatile **pp /* OUT: Mapped memory */
){
unixFile *pDbFd = (unixFile*)fd;
unixShm *p = pDbFd->pShm;
unixShmNode *pShmNode = p->pShmNode;
int rc = SQLITE_OK;
assert( p->hasMutexBuf==0 );
sqlite3_mutex_enter(pShmNode->mutexBuf);
assert( pgsz==pShmNode->pgsz || pShmNode->nPage==0 );
if( pShmNode->nPage<=iPage ){
char **apNew; /* New apPage[] array */
int nByte = (iPage+1)*pgsz; /* Minimum required file size */
struct stat sStat;
pShmNode->pgsz = pgsz;
/* Make sure the underlying file is large enough (or fail) */
if( fstat(pShmNode->h, &sStat) ){
rc = SQLITE_IOERR_SHMSIZE;
goto shmpage_out;
}else if( sStat.st_size<nByte ){
if( !isWrite ) goto shmpage_out;
if( ftruncate(pShmNode->h, nByte) ){
rc = SQLITE_IOERR_SHMSIZE;
goto shmpage_out;
}
}
apNew = (char**)sqlite3_realloc(pShmNode->apPage, (iPage+1)*sizeof(char *));
if( !apNew ){
rc = SQLITE_IOERR_NOMEM;
goto shmpage_out;
}
pShmNode->apPage = apNew;
while(pShmNode->nPage<=iPage){
void *pMem = mmap(
0, pgsz, PROT_READ|PROT_WRITE, MAP_SHARED, pShmNode->h, iPage*pgsz
);
if( pMem==MAP_FAILED ){
assert(0);
rc = SQLITE_IOERR;
goto shmpage_out;
}
pShmNode->apPage[pShmNode->nPage] = pMem;
pShmNode->nPage++;
}
}
shmpage_out:
if( pShmNode->nPage>iPage ){
*pp = pShmNode->apPage[iPage];
}else{
*pp = 0;
}
sqlite3_mutex_leave(pShmNode->mutexBuf);
return rc;
}
#else #else
# define unixShmOpen 0 # define unixShmOpen 0
@ -3715,6 +3791,7 @@ static void unixShmBarrier(
# define unixShmLock 0 # define unixShmLock 0
# define unixShmBarrier 0 # define unixShmBarrier 0
# define unixShmClose 0 # define unixShmClose 0
# define unixShmPage 0
#endif /* #ifndef SQLITE_OMIT_WAL */ #endif /* #ifndef SQLITE_OMIT_WAL */
/* /*
@ -3778,7 +3855,8 @@ static const sqlite3_io_methods METHOD = { \
unixShmRelease, /* xShmRelease */ \ unixShmRelease, /* xShmRelease */ \
unixShmLock, /* xShmLock */ \ unixShmLock, /* xShmLock */ \
unixShmBarrier, /* xShmBarrier */ \ unixShmBarrier, /* xShmBarrier */ \
unixShmClose /* xShmClose */ \ unixShmClose, /* xShmClose */ \
unixShmPage /* xShmPage */ \
}; \ }; \
static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \ static const sqlite3_io_methods *FINDER##Impl(const char *z, unixFile *p){ \
UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \ UNUSED_PARAMETER(z); UNUSED_PARAMETER(p); \

View File

@ -666,6 +666,7 @@ struct sqlite3_io_methods {
int (*xShmLock)(sqlite3_file*, int offset, int n, int flags); int (*xShmLock)(sqlite3_file*, int offset, int n, int flags);
void (*xShmBarrier)(sqlite3_file*); void (*xShmBarrier)(sqlite3_file*);
int (*xShmClose)(sqlite3_file*, int deleteFlag); int (*xShmClose)(sqlite3_file*, int deleteFlag);
int (*xShmPage)(sqlite3_file*, int iPage, int pgsz, int, void volatile**);
/* Methods above are valid for version 2 */ /* Methods above are valid for version 2 */
/* Additional methods may be added in future releases */ /* Additional methods may be added in future releases */
}; };

View File

@ -57,6 +57,7 @@ static int devsymShmRelease(sqlite3_file*);
static int devsymShmLock(sqlite3_file*,int,int,int); static int devsymShmLock(sqlite3_file*,int,int,int);
static void devsymShmBarrier(sqlite3_file*); static void devsymShmBarrier(sqlite3_file*);
static int devsymShmClose(sqlite3_file*,int); static int devsymShmClose(sqlite3_file*,int);
static int devsymShmPage(sqlite3_file*,int,int,int, void volatile **);
/* /*
** Method declarations for devsym_vfs. ** Method declarations for devsym_vfs.
@ -125,7 +126,8 @@ static sqlite3_io_methods devsym_io_methods = {
devsymShmRelease, /* xShmRelease */ devsymShmRelease, /* xShmRelease */
devsymShmLock, /* xShmLock */ devsymShmLock, /* xShmLock */
devsymShmBarrier, /* xShmBarrier */ devsymShmBarrier, /* xShmBarrier */
devsymShmClose /* xShmClose */ devsymShmClose, /* xShmClose */
devsymShmPage /* xShmPage */
}; };
struct DevsymGlobal { struct DevsymGlobal {
@ -275,6 +277,16 @@ static int devsymShmClose(sqlite3_file *pFile, int delFlag){
devsym_file *p = (devsym_file *)pFile; devsym_file *p = (devsym_file *)pFile;
return sqlite3OsShmClose(p->pReal, delFlag); return sqlite3OsShmClose(p->pReal, delFlag);
} }
static int devsymShmPage(
sqlite3_file *pFile,
int iPage,
int pgsz,
int isWrite,
void volatile **pp
){
devsym_file *p = (devsym_file *)pFile;
return sqlite3OsShmPage(p->pReal, iPage, pgsz, isWrite, pp);
}

View File

@ -75,10 +75,14 @@ struct Testvfs {
#define TESTVFS_SHMLOCK_MASK 0x00000010 #define TESTVFS_SHMLOCK_MASK 0x00000010
#define TESTVFS_SHMBARRIER_MASK 0x00000020 #define TESTVFS_SHMBARRIER_MASK 0x00000020
#define TESTVFS_SHMCLOSE_MASK 0x00000040 #define TESTVFS_SHMCLOSE_MASK 0x00000040
#define TESTVFS_SHMPAGE_MASK 0x00000080
#define TESTVFS_OPEN_MASK 0x00000080 #define TESTVFS_OPEN_MASK 0x00000100
#define TESTVFS_SYNC_MASK 0x00000100 #define TESTVFS_SYNC_MASK 0x00000200
#define TESTVFS_ALL_MASK 0x000001FF #define TESTVFS_ALL_MASK 0x000003FF
#define TESTVFS_MAX_PAGES 256
/* /*
** A shared-memory buffer. There is one of these objects for each shared ** A shared-memory buffer. There is one of these objects for each shared
@ -87,8 +91,8 @@ struct Testvfs {
*/ */
struct TestvfsBuffer { struct TestvfsBuffer {
char *zFile; /* Associated file name */ char *zFile; /* Associated file name */
int n; /* Size of allocated buffer in bytes */ int pgsz; /* Page size */
u8 *a; /* Buffer allocated using ckalloc() */ u8 *aPage[TESTVFS_MAX_PAGES]; /* Array of ckalloc'd pages */
TestvfsFile *pFile; /* List of open handles */ TestvfsFile *pFile; /* List of open handles */
TestvfsBuffer *pNext; /* Next in linked list of all buffers */ TestvfsBuffer *pNext; /* Next in linked list of all buffers */
}; };
@ -139,6 +143,7 @@ static int tvfsShmRelease(sqlite3_file*);
static int tvfsShmLock(sqlite3_file*, int , int, int); static int tvfsShmLock(sqlite3_file*, int , int, int);
static void tvfsShmBarrier(sqlite3_file*); static void tvfsShmBarrier(sqlite3_file*);
static int tvfsShmClose(sqlite3_file*, int); static int tvfsShmClose(sqlite3_file*, int);
static int tvfsShmPage(sqlite3_file*,int,int,int, void volatile **);
static sqlite3_io_methods tvfs_io_methods = { static sqlite3_io_methods tvfs_io_methods = {
2, /* iVersion */ 2, /* iVersion */
@ -160,7 +165,8 @@ static sqlite3_io_methods tvfs_io_methods = {
tvfsShmRelease, /* xShmRelease */ tvfsShmRelease, /* xShmRelease */
tvfsShmLock, /* xShmLock */ tvfsShmLock, /* xShmLock */
tvfsShmBarrier, /* xShmBarrier */ tvfsShmBarrier, /* xShmBarrier */
tvfsShmClose /* xShmClose */ tvfsShmClose, /* xShmClose */
tvfsShmPage /* xShmPage */
}; };
static int tvfsResultCode(Testvfs *p, int *pRc){ static int tvfsResultCode(Testvfs *p, int *pRc){
@ -547,16 +553,6 @@ static int tvfsCurrentTime(sqlite3_vfs *pVfs, double *pTimeOut){
return PARENTVFS(pVfs)->xCurrentTime(PARENTVFS(pVfs), pTimeOut); return PARENTVFS(pVfs)->xCurrentTime(PARENTVFS(pVfs), pTimeOut);
} }
static void tvfsGrowBuffer(TestvfsFile *pFd, int reqSize, int *pNewSize){
TestvfsBuffer *pBuffer = pFd->pShm;
if( reqSize>pBuffer->n ){
pBuffer->a = (u8 *)ckrealloc((char *)pBuffer->a, reqSize);
memset(&pBuffer->a[pBuffer->n], 0x55, reqSize-pBuffer->n);
pBuffer->n = reqSize;
}
*pNewSize = pBuffer->n;
}
static int tvfsInjectIoerr(Testvfs *p){ static int tvfsInjectIoerr(Testvfs *p){
int ret = 0; int ret = 0;
if( p->ioerr ){ if( p->ioerr ){
@ -624,66 +620,66 @@ static int tvfsShmSize(
int reqSize, int reqSize,
int *pNewSize int *pNewSize
){ ){
int rc = SQLITE_OK; assert(0);
TestvfsFile *pFd = (TestvfsFile *)pFile; return SQLITE_OK;
Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
if( p->pScript && p->mask&TESTVFS_SHMSIZE_MASK ){
tvfsExecTcl(p, "xShmSize",
Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, 0
);
tvfsResultCode(p, &rc);
}
if( rc==SQLITE_OK && p->mask&TESTVFS_SHMSIZE_MASK && tvfsInjectIoerr(p) ){
rc = SQLITE_IOERR;
}
if( rc==SQLITE_OK ){
tvfsGrowBuffer(pFd, reqSize, pNewSize);
}
return rc;
} }
static int tvfsShmGet( static int tvfsShmGet(
sqlite3_file *pFile, sqlite3_file *pFile,
int reqMapSize, int reqMapSize,
int *pMapSize, int *pMapSize,
volatile void **pp volatile void **pp
){
assert(0);
return SQLITE_OK;
}
static int tvfsShmRelease(sqlite3_file *pFile){
assert(0);
return SQLITE_OK;
}
static void tvfsAllocPage(TestvfsBuffer *p, int iPage, int pgsz){
assert( iPage<TESTVFS_MAX_PAGES );
if( p->aPage[iPage]==0 ){
p->aPage[iPage] = ckalloc(pgsz);
memset(p->aPage[iPage], 0, pgsz);
p->pgsz = pgsz;
}
}
static int tvfsShmPage(
sqlite3_file *pFile, /* Handle open on database file */
int iPage, /* Page to retrieve */
int pgsz, /* Size of pages */
int isWrite, /* True to extend file if necessary */
void volatile **pp /* OUT: Mapped memory */
){ ){
int rc = SQLITE_OK; int rc = SQLITE_OK;
TestvfsFile *pFd = (TestvfsFile *)pFile; TestvfsFile *pFd = (TestvfsFile *)pFile;
Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData); Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
if( p->pScript && p->mask&TESTVFS_SHMGET_MASK ){ if( p->pScript && p->mask&TESTVFS_SHMPAGE_MASK ){
tvfsExecTcl(p, "xShmGet", Tcl_Obj *pArg = Tcl_NewObj();
Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(iPage));
Tcl_NewIntObj(reqMapSize) Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(pgsz));
Tcl_ListObjAppendElement(p->interp, pArg, Tcl_NewIntObj(isWrite));
tvfsExecTcl(p, "xShmPage",
Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, pArg
); );
tvfsResultCode(p, &rc); tvfsResultCode(p, &rc);
} }
if( rc==SQLITE_OK && p->mask&TESTVFS_SHMGET_MASK && tvfsInjectIoerr(p) ){ if( rc==SQLITE_OK && p->mask&TESTVFS_SHMPAGE_MASK && tvfsInjectIoerr(p) ){
rc = SQLITE_IOERR; rc = SQLITE_IOERR;
} }
*pMapSize = pFd->pShm->n; if( rc==SQLITE_OK && isWrite && !pFd->pShm->aPage[iPage] ){
*pp = pFd->pShm->a; tvfsAllocPage(pFd->pShm, iPage, pgsz);
return rc;
}
static int tvfsShmRelease(sqlite3_file *pFile){
int rc = SQLITE_OK;
TestvfsFile *pFd = (TestvfsFile *)pFile;
Testvfs *p = (Testvfs *)(pFd->pVfs->pAppData);
if( p->pScript && p->mask&TESTVFS_SHMRELEASE_MASK ){
tvfsExecTcl(p, "xShmRelease",
Tcl_NewStringObj(pFd->pShm->zFile, -1), pFd->pShmId, 0
);
tvfsResultCode(p, &rc);
} }
*pp = (void volatile *)pFd->pShm->aPage[iPage];
return rc; return rc;
} }
static int tvfsShmLock( static int tvfsShmLock(
sqlite3_file *pFile, sqlite3_file *pFile,
int ofst, int ofst,
@ -782,10 +778,13 @@ static int tvfsShmClose(
*ppFd = pFd->pNext; *ppFd = pFd->pNext;
if( pBuffer->pFile==0 ){ if( pBuffer->pFile==0 ){
int i;
TestvfsBuffer **pp; TestvfsBuffer **pp;
for(pp=&p->pBuffer; *pp!=pBuffer; pp=&((*pp)->pNext)); for(pp=&p->pBuffer; *pp!=pBuffer; pp=&((*pp)->pNext));
*pp = (*pp)->pNext; *pp = (*pp)->pNext;
ckfree((char *)pBuffer->a); for(i=0; pBuffer->aPage[i]; i++){
ckfree((char *)pBuffer->aPage[i]);
}
ckfree((char *)pBuffer); ckfree((char *)pBuffer);
} }
pFd->pShm = 0; pFd->pShm = 0;
@ -821,6 +820,8 @@ static int testvfs_obj_cmd(
switch( (enum DB_enum)i ){ switch( (enum DB_enum)i ){
case CMD_SHM: { case CMD_SHM: {
Tcl_Obj *pObj;
int i;
TestvfsBuffer *pBuffer; TestvfsBuffer *pBuffer;
char *zName; char *zName;
if( objc!=3 && objc!=4 ){ if( objc!=3 && objc!=4 ){
@ -838,11 +839,22 @@ static int testvfs_obj_cmd(
if( objc==4 ){ if( objc==4 ){
int n; int n;
u8 *a = Tcl_GetByteArrayFromObj(objv[3], &n); u8 *a = Tcl_GetByteArrayFromObj(objv[3], &n);
pBuffer->a = (u8 *)ckrealloc((char *)pBuffer->a, n); assert( pBuffer->pgsz==0 || pBuffer->pgsz==32768 );
pBuffer->n = n; for(i=0; i*32768<n; i++){
memcpy(pBuffer->a, a, n); int nByte = 32768;
tvfsAllocPage(pBuffer, i, 32768);
if( n-i*32768<32768 ){
nByte = n;
} }
Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(pBuffer->a, pBuffer->n)); memcpy(pBuffer->aPage[i], &a[i*32768], nByte);
}
}
pObj = Tcl_NewObj();
for(i=0; pBuffer->aPage[i]; i++){
Tcl_AppendObjToObj(pObj, Tcl_NewByteArrayObj(pBuffer->aPage[i], 32768));
}
Tcl_SetObjResult(interp, pObj);
break; break;
} }

468
src/wal.c
View File

@ -370,8 +370,8 @@ struct Wal {
sqlite3_file *pDbFd; /* File handle for the database file */ sqlite3_file *pDbFd; /* File handle for the database file */
sqlite3_file *pWalFd; /* File handle for WAL file */ sqlite3_file *pWalFd; /* File handle for WAL file */
u32 iCallback; /* Value to pass to log callback (or 0) */ u32 iCallback; /* Value to pass to log callback (or 0) */
int szWIndex; /* Size of the wal-index that is mapped in mem */ int nWiData; /* Size of array apWiData */
volatile u32 *pWiData; /* Pointer to wal-index content in memory */ volatile u32 **apWiData; /* Pointer to wal-index content in memory */
u16 szPage; /* Database page size */ u16 szPage; /* Database page size */
i16 readLock; /* Which read lock is being held. -1 for none */ i16 readLock; /* Which read lock is being held. -1 for none */
u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */ u8 exclusiveMode; /* Non-zero if connection is in exclusive mode */
@ -386,15 +386,77 @@ struct Wal {
#endif #endif
}; };
/*
** Define the parameters of the hash tables in the wal-index file. There
** is a hash-table following every HASHTABLE_NPAGE page numbers in the
** wal-index.
**
** Changing any of these constants will alter the wal-index format and
** create incompatibilities.
*/
#define HASHTABLE_NPAGE 4096 /* Must be power of 2 and multiple of 256 */
#define HASHTABLE_DATATYPE u16
#define HASHTABLE_HASH_1 383 /* Should be prime */
#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */
#define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT)
/* The block of page numbers associated with the first hash-table in a
** wal-index is smaller than usual. This is so that there is a complete
** hash-table on each aligned 32KB page of the wal-index.
*/
#define HASHTABLE_NPAGE_ONE (4096 - (WALINDEX_HDR_SIZE/sizeof(u32)))
/* The wal-index is divided into pages of HASHTABLE_PAGESIZE bytes each. */
#define HASHTABLE_PAGESIZE (HASHTABLE_NBYTE + HASHTABLE_NPAGE*sizeof(u32))
/*
** Obtain a pointer to the iPage'th page of the wal-index. The wal-index
** is broken into pages of HASHTABLE_PAGESIZE bytes. Wal-index pages are
** numbered from zero.
**
** If this call is successful, *ppPage is set to point to the wal-index
** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs,
** then an SQLite error code is returned and *ppPage is set to 0.
*/
static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){
int rc = SQLITE_OK;
/* Enlarge the pWal->apWiData[] array if required */
if( pWal->nWiData<=iPage ){
int nByte = sizeof(u32 *)*(iPage+1);
volatile u32 **apNew;
apNew = (volatile u32 **)sqlite3_realloc(pWal->apWiData, nByte);
if( !apNew ){
*ppPage = 0;
return SQLITE_NOMEM;
}
memset(&apNew[pWal->nWiData], 0, sizeof(u32 *)*(iPage+1-pWal->nWiData));
pWal->apWiData = apNew;
pWal->nWiData = iPage+1;
}
/* Request a pointer to the required page from the VFS */
if( pWal->apWiData[iPage]==0 ){
rc = sqlite3OsShmPage(pWal->pDbFd, iPage, HASHTABLE_PAGESIZE,
pWal->writeLock, (void volatile **)&pWal->apWiData[iPage]
);
}
*ppPage = pWal->apWiData[iPage];
assert( iPage==0 || *ppPage || rc!=SQLITE_OK );
return rc;
}
/* /*
** Return a pointer to the WalCkptInfo structure in the wal-index. ** Return a pointer to the WalCkptInfo structure in the wal-index.
*/ */
static volatile WalCkptInfo *walCkptInfo(Wal *pWal){ static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
assert( pWal->pWiData!=0 ); volatile u32 *page1 = 0;
return (volatile WalCkptInfo*)&pWal->pWiData[sizeof(WalIndexHdr)/2]; walIndexPage(pWal, 0, &page1);
assert( page1 );
return (volatile WalCkptInfo*)&page1[sizeof(WalIndexHdr)/2];
} }
/* /*
** This structure is used to implement an iterator that loops through ** This structure is used to implement an iterator that loops through
** all frames in the WAL in database page order. Where two or more frames ** all frames in the WAL in database page order. Where two or more frames
@ -413,12 +475,13 @@ static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
struct WalIterator { struct WalIterator {
int iPrior; /* Last result returned from the iterator */ int iPrior; /* Last result returned from the iterator */
int nSegment; /* Size of the aSegment[] array */ int nSegment; /* Size of the aSegment[] array */
int nFinal; /* Elements in aSegment[nSegment-1] */
struct WalSegment { struct WalSegment {
int iNext; /* Next slot in aIndex[] not previously returned */ int iNext; /* Next slot in aIndex[] not yet returned */
u8 *aIndex; /* i0, i1, i2... such that aPgno[iN] ascending */ HASHTABLE_DATATYPE *aIndex; /* i0, i1, i2... such that aPgno[iN] ascend */
u32 *aPgno; /* 256 page numbers. Pointer to Wal.pWiData */ u32 *aPgno; /* Array of page numbers. */
} aSegment[1]; /* One for every 256 entries in the WAL */ int nEntry; /* Max size of aPgno[] and aIndex[] arrays */
int iZero; /* Frame number associated with aPgno[0] */
} aSegment[1]; /* One for every 32KB page in the WAL */
}; };
/* /*
@ -492,7 +555,7 @@ static void walIndexWriteHdr(Wal *pWal){
pWal->hdr.isInit = 1; pWal->hdr.isInit = 1;
walChecksumBytes(1, (u8*)&pWal->hdr, offsetof(WalIndexHdr, aCksum), walChecksumBytes(1, (u8*)&pWal->hdr, offsetof(WalIndexHdr, aCksum),
0, pWal->hdr.aCksum); 0, pWal->hdr.aCksum);
aHdr = (WalIndexHdr*)pWal->pWiData; walIndexPage(pWal, 0, (volatile u32 **)&aHdr);
memcpy(&aHdr[1], &pWal->hdr, sizeof(WalIndexHdr)); memcpy(&aHdr[1], &pWal->hdr, sizeof(WalIndexHdr));
sqlite3OsShmBarrier(pWal->pDbFd); sqlite3OsShmBarrier(pWal->pDbFd);
memcpy(&aHdr[0], &pWal->hdr, sizeof(WalIndexHdr)); memcpy(&aHdr[0], &pWal->hdr, sizeof(WalIndexHdr));
@ -586,19 +649,6 @@ static int walDecodeFrame(
return 1; return 1;
} }
/*
** Define the parameters of the hash tables in the wal-index file. There
** is a hash-table following every HASHTABLE_NPAGE page numbers in the
** wal-index.
**
** Changing any of these constants will alter the wal-index format and
** create incompatibilities.
*/
#define HASHTABLE_NPAGE 4096 /* Must be power of 2 and multiple of 256 */
#define HASHTABLE_DATATYPE u16
#define HASHTABLE_HASH_1 383 /* Should be prime */
#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2) /* Must be a power of 2 */
#define HASHTABLE_NBYTE (sizeof(HASHTABLE_DATATYPE)*HASHTABLE_NSLOT)
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
/* /*
@ -663,96 +713,6 @@ static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
walLockName(lockIdx), n)); walLockName(lockIdx), n));
} }
/*
** Return the index in the Wal.pWiData array that corresponds to
** frame iFrame.
**
** Wal.pWiData is an array of u32 elements that is the wal-index.
** The array begins with a header and is then followed by alternating
** "map" and "hash-table" blocks. Each "map" block consists of
** HASHTABLE_NPAGE u32 elements which are page numbers corresponding
** to frames in the WAL file.
**
** This routine returns an index X such that Wal.pWiData[X] is part
** of a "map" block that contains the page number of the iFrame-th
** frame in the WAL file.
*/
static int walIndexEntry(u32 iFrame){
return (
(WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)/sizeof(u32)
+ (((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NBYTE)/sizeof(u32)
+ (iFrame-1)
);
}
/*
** Return the minimum size of the shared-memory, in bytes, that is needed
** to support a wal-index containing frame iFrame. The value returned
** includes the wal-index header and the complete "block" containing iFrame,
** including the hash table segment that follows the block.
*/
static int walMappingSize(u32 iFrame){
const int nByte = (sizeof(u32)*HASHTABLE_NPAGE + HASHTABLE_NBYTE) ;
return ( WALINDEX_LOCK_OFFSET
+ WALINDEX_LOCK_RESERVED
+ nByte * ((iFrame + HASHTABLE_NPAGE - 1)/HASHTABLE_NPAGE)
);
}
/*
** Release our reference to the wal-index memory map, if we are holding
** it.
*/
static void walIndexUnmap(Wal *pWal){
if( pWal->pWiData ){
sqlite3OsShmRelease(pWal->pDbFd);
}
pWal->pWiData = 0;
pWal->szWIndex = -1;
}
/*
** Map the wal-index file into memory if it isn't already.
**
** The reqSize parameter is the requested size of the mapping. The
** mapping will be at least this big if the underlying storage is
** that big. But the mapping will never grow larger than the underlying
** storage. Use the walIndexRemap() to enlarget the storage space.
*/
static int walIndexMap(Wal *pWal, int reqSize){
int rc = SQLITE_OK;
if( pWal->pWiData==0 || reqSize>pWal->szWIndex ){
walIndexUnmap(pWal);
rc = sqlite3OsShmGet(pWal->pDbFd, reqSize, &pWal->szWIndex,
(void volatile**)(char volatile*)&pWal->pWiData);
if( rc!=SQLITE_OK ){
walIndexUnmap(pWal);
}
}
return rc;
}
/*
** Enlarge the wal-index to be at least enlargeTo bytes in size and
** Remap the wal-index so that the mapping covers the full size
** of the underlying file.
**
** If enlargeTo is non-negative, then increase the size of the underlying
** storage to be at least as big as enlargeTo before remapping.
*/
static int walIndexRemap(Wal *pWal, int enlargeTo){
int rc;
int sz;
assert( pWal->writeLock );
rc = sqlite3OsShmSize(pWal->pDbFd, enlargeTo, &sz);
if( rc==SQLITE_OK && sz>pWal->szWIndex ){
walIndexUnmap(pWal);
rc = walIndexMap(pWal, sz);
}
assert( pWal->szWIndex>=enlargeTo || rc!=SQLITE_OK );
return rc;
}
/* /*
** Compute a hash on a page number. The resulting hash value must land ** Compute a hash on a page number. The resulting hash value must land
** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances ** between 0 and (HASHTABLE_NSLOT-1). The walHashNext() function advances
@ -767,6 +727,54 @@ static int walNextHash(int iPriorHash){
return (iPriorHash+1)&(HASHTABLE_NSLOT-1); return (iPriorHash+1)&(HASHTABLE_NSLOT-1);
} }
static void walHashGet(
Wal *pWal, /* WAL handle */
int iHash, /* Find the iHash'th table */
volatile HASHTABLE_DATATYPE **paHash, /* OUT: Pointer to hash index */
volatile u32 **paPgno, /* OUT: Pointer to page number array */
u32 *piZero /* OUT: Frame associated with *paPgno[0] */
){
u32 iZero;
volatile u32 *aPgno;
volatile HASHTABLE_DATATYPE *aHash;
walIndexPage(pWal, iHash, &aPgno);
aHash = (volatile HASHTABLE_DATATYPE *)&aPgno[HASHTABLE_NPAGE];
if( iHash==0 ){
aPgno = &aPgno[WALINDEX_HDR_SIZE/sizeof(u32)-1];
iZero = 0;
}else{
iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE;
aPgno = &aPgno[-1*iZero-1];
}
*paPgno = aPgno;
*paHash = aHash;
*piZero = iZero;
}
static int walFramePage(u32 iFrame){
int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE;
assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE)
&& (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE)
&& (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE))
&& (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)
&& (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE))
);
return iHash;
}
/*
** Return the page number associated with frame iFrame in this WAL.
*/
static u32 walFramePgno(Wal *pWal, u32 iFrame){
int iHash = walFramePage(iFrame);
if( iHash==0 ){
return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1];
}
return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE];
}
/* /*
** Find the hash table and (section of the) page number array used to ** Find the hash table and (section of the) page number array used to
@ -789,27 +797,8 @@ static void walHashFind(
volatile u32 **paPgno, /* OUT: Pointer to page number array */ volatile u32 **paPgno, /* OUT: Pointer to page number array */
u32 *piZero /* OUT: Frame associated with *paPgno[0] */ u32 *piZero /* OUT: Frame associated with *paPgno[0] */
){ ){
u32 iZero; int iHash = walFramePage(iFrame);
volatile u32 *aPgno; walHashGet(pWal, iHash, paHash, paPgno, piZero);
volatile HASHTABLE_DATATYPE *aHash;
iZero = ((iFrame-1)/HASHTABLE_NPAGE) * HASHTABLE_NPAGE;
aPgno = &pWal->pWiData[walIndexEntry(iZero+1)-iZero-1];
aHash = (HASHTABLE_DATATYPE *)&aPgno[iZero+HASHTABLE_NPAGE+1];
/* Assert that:
**
** + the mapping is large enough for this hash-table, and
**
** + that aPgno[iZero+1] really is the database page number associated
** with the first frame indexed by this hash table.
*/
assert( (u32*)(&aHash[HASHTABLE_NSLOT])<=&pWal->pWiData[pWal->szWIndex/4] );
assert( walIndexEntry(iZero+1)==(&aPgno[iZero+1] - pWal->pWiData) );
*paHash = aHash;
*paPgno = aPgno;
*piZero = iZero;
} }
/* /*
@ -829,16 +818,16 @@ static void walCleanupHash(Wal *pWal){
volatile u32 *aPgno; /* Unused return from walHashFind() */ volatile u32 *aPgno; /* Unused return from walHashFind() */
u32 iZero; /* frame == (aHash[x]+iZero) */ u32 iZero; /* frame == (aHash[x]+iZero) */
int iLimit = 0; /* Zero values greater than this */ int iLimit = 0; /* Zero values greater than this */
int nByte; /* Number of bytes to zero in aPgno[] */
int i; /* Used to iterate through aHash[] */
assert( pWal->writeLock ); assert( pWal->writeLock );
testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE-1 ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE-1 );
testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE );
testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE+1 ); testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE+1 );
if( (pWal->hdr.mxFrame % HASHTABLE_NPAGE)>0 ){
int nByte; /* Number of bytes to zero in aPgno[] */
int i; /* Used to iterate through aHash[] */
walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero); walHashFind(pWal, pWal->hdr.mxFrame+1, &aHash, &aPgno, &iZero);
if( iZero!=pWal->hdr.mxFrame ){
iLimit = pWal->hdr.mxFrame - iZero; iLimit = pWal->hdr.mxFrame - iZero;
assert( iLimit>0 ); assert( iLimit>0 );
for(i=0; i<HASHTABLE_NSLOT; i++){ for(i=0; i<HASHTABLE_NSLOT; i++){
@ -850,9 +839,8 @@ static void walCleanupHash(Wal *pWal){
/* Zero the entries in the aPgno array that correspond to frames with /* Zero the entries in the aPgno array that correspond to frames with
** frame numbers greater than pWal->hdr.mxFrame. ** frame numbers greater than pWal->hdr.mxFrame.
*/ */
nByte = sizeof(u32) * (HASHTABLE_NPAGE-iLimit); nByte = ((char *)aHash - (char *)&aPgno[pWal->hdr.mxFrame+1]);
memset((void *)&aPgno[iZero+iLimit+1], 0, nByte); memset((void *)&aPgno[pWal->hdr.mxFrame+1], 0, nByte);
assert( &((u8 *)&aPgno[iZero+iLimit+1])[nByte]==(u8 *)aHash );
} }
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
@ -878,15 +866,7 @@ static void walCleanupHash(Wal *pWal){
** pPage into WAL frame iFrame. ** pPage into WAL frame iFrame.
*/ */
static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
int rc; /* Return code */ int rc = SQLITE_OK; /* Return code */
int nMapping; /* Required mapping size in bytes */
/* Make sure the wal-index is mapped. Enlarge the mapping if required. */
nMapping = walMappingSize(iFrame);
rc = walIndexMap(pWal, nMapping);
while( rc==SQLITE_OK && nMapping>pWal->szWIndex ){
rc = walIndexRemap(pWal, nMapping);
}
/* Assuming the wal-index file was successfully mapped, find the hash /* Assuming the wal-index file was successfully mapped, find the hash
** table and section of of the page number array that pertain to frame ** table and section of of the page number array that pertain to frame
@ -904,8 +884,8 @@ static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero); walHashFind(pWal, iFrame, &aHash, &aPgno, &iZero);
idx = iFrame - iZero; idx = iFrame - iZero;
if( idx==1 ){ if( idx==1 ){
memset((void*)&aPgno[iZero+1], 0, HASHTABLE_NPAGE*sizeof(u32)); int nByte = (u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1+iZero];
memset((void*)aHash, 0, HASHTABLE_NBYTE); memset((void*)&aPgno[1+iZero], 0, nByte);
} }
assert( idx <= HASHTABLE_NSLOT/2 + 1 ); assert( idx <= HASHTABLE_NSLOT/2 + 1 );
@ -1076,9 +1056,6 @@ static int walIndexRecover(Wal *pWal){
} }
finished: finished:
if( rc==SQLITE_OK && pWal->hdr.mxFrame==0 ){
rc = walIndexRemap(pWal, walMappingSize(1));
}
if( rc==SQLITE_OK ){ if( rc==SQLITE_OK ){
volatile WalCkptInfo *pInfo; volatile WalCkptInfo *pInfo;
int i; int i;
@ -1164,7 +1141,6 @@ int sqlite3WalOpen(
pRet->pVfs = pVfs; pRet->pVfs = pVfs;
pRet->pWalFd = (sqlite3_file *)&pRet[1]; pRet->pWalFd = (sqlite3_file *)&pRet[1];
pRet->pDbFd = pDbFd; pRet->pDbFd = pDbFd;
pRet->szWIndex = -1;
pRet->readLock = -1; pRet->readLock = -1;
sqlite3_randomness(8, &pRet->hdr.aSalt); sqlite3_randomness(8, &pRet->hdr.aSalt);
pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd; pRet->zWalName = zWal = pVfs->szOsFile + (char*)pRet->pWalFd;
@ -1207,24 +1183,22 @@ static int walIteratorNext(
u32 iMin; /* Result pgno must be greater than iMin */ u32 iMin; /* Result pgno must be greater than iMin */
u32 iRet = 0xFFFFFFFF; /* 0xffffffff is never a valid page number */ u32 iRet = 0xFFFFFFFF; /* 0xffffffff is never a valid page number */
int i; /* For looping through segments */ int i; /* For looping through segments */
int nBlock = p->nFinal; /* Number of entries in current segment */
iMin = p->iPrior; iMin = p->iPrior;
assert( iMin<0xffffffff ); assert( iMin<0xffffffff );
for(i=p->nSegment-1; i>=0; i--){ for(i=p->nSegment-1; i>=0; i--){
struct WalSegment *pSegment = &p->aSegment[i]; struct WalSegment *pSegment = &p->aSegment[i];
while( pSegment->iNext<nBlock ){ while( pSegment->iNext<pSegment->nEntry ){
u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]]; u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]];
if( iPg>iMin ){ if( iPg>iMin ){
if( iPg<iRet ){ if( iPg<iRet ){
iRet = iPg; iRet = iPg;
*piFrame = i*256 + 1 + pSegment->aIndex[pSegment->iNext]; *piFrame = pSegment->iZero + pSegment->aIndex[pSegment->iNext];
} }
break; break;
} }
pSegment->iNext++; pSegment->iNext++;
} }
nBlock = 256;
} }
*piPage = p->iPrior = iRet; *piPage = p->iPrior = iRet;
@ -1232,28 +1206,28 @@ static int walIteratorNext(
} }
static void walMergesort8( static void walMergesort(
Pgno *aContent, /* Pages in wal */ u32 *aContent, /* Pages in wal */
u8 *aBuffer, /* Buffer of at least *pnList items to use */ HASHTABLE_DATATYPE *aBuffer, /* Buffer of at least *pnList items to use */
u8 *aList, /* IN/OUT: List to sort */ HASHTABLE_DATATYPE *aList, /* IN/OUT: List to sort */
int *pnList /* IN/OUT: Number of elements in aList[] */ int *pnList /* IN/OUT: Number of elements in aList[] */
){ ){
int nList = *pnList; int nList = *pnList;
if( nList>1 ){ if( nList>1 ){
int nLeft = nList / 2; /* Elements in left list */ int nLeft = nList / 2; /* Elements in left list */
int nRight = nList - nLeft; /* Elements in right list */ int nRight = nList - nLeft; /* Elements in right list */
u8 *aLeft = aList; /* Left list */
u8 *aRight = &aList[nLeft]; /* Right list */
int iLeft = 0; /* Current index in aLeft */ int iLeft = 0; /* Current index in aLeft */
int iRight = 0; /* Current index in aright */ int iRight = 0; /* Current index in aright */
int iOut = 0; /* Current index in output buffer */ int iOut = 0; /* Current index in output buffer */
HASHTABLE_DATATYPE *aLeft = aList; /* Left list */
HASHTABLE_DATATYPE *aRight = &aList[nLeft]; /* Right list */
/* TODO: Change to non-recursive version. */ /* TODO: Change to non-recursive version. */
walMergesort8(aContent, aBuffer, aLeft, &nLeft); walMergesort(aContent, aBuffer, aLeft, &nLeft);
walMergesort8(aContent, aBuffer, aRight, &nRight); walMergesort(aContent, aBuffer, aRight, &nRight);
while( iRight<nRight || iLeft<nLeft ){ while( iRight<nRight || iLeft<nLeft ){
u8 logpage; HASHTABLE_DATATYPE logpage;
Pgno dbpage; Pgno dbpage;
if( (iLeft<nLeft) if( (iLeft<nLeft)
@ -1300,60 +1274,68 @@ static void walMergesort8(
** prior to the WalIterator object being destroyed. ** prior to the WalIterator object being destroyed.
*/ */
static int walIteratorInit(Wal *pWal, WalIterator **pp){ static int walIteratorInit(Wal *pWal, WalIterator **pp){
u32 *aData; /* Content of the wal-index file */
WalIterator *p; /* Return value */ WalIterator *p; /* Return value */
int nSegment; /* Number of segments to merge */ int nSegment; /* Number of segments to merge */
u32 iLast; /* Last frame in log */ u32 iLast; /* Last frame in log */
int nByte; /* Number of bytes to allocate */ int nByte; /* Number of bytes to allocate */
int i; /* Iterator variable */ int i; /* Iterator variable */
int nFinal; /* Number of unindexed entries */ HASHTABLE_DATATYPE *aTmp; /* Temp space used by merge-sort */
u8 *aTmp; /* Temp space used by merge-sort */ HASHTABLE_DATATYPE *aSpace; /* Space at the end of the allocation */
u8 *aSpace; /* Surplus space on the end of the allocation */
/* Make sure the wal-index is mapped into local memory */
assert( pWal->pWiData && pWal->szWIndex>=walMappingSize(pWal->hdr.mxFrame) );
/* This routine only runs while holding SQLITE_SHM_CHECKPOINT. No other /* This routine only runs while holding SQLITE_SHM_CHECKPOINT. No other
** thread is able to write to shared memory while this routine is ** thread is able to write to shared memory while this routine is
** running (or, indeed, while the WalIterator object exists). Hence, ** running (or, indeed, while the WalIterator object exists). Hence,
** we can cast off the volatile qualifacation from shared memory ** we can cast off the volatile qualification from shared memory
*/ */
assert( pWal->ckptLock ); assert( pWal->ckptLock );
aData = (u32*)pWal->pWiData; iLast = pWal->hdr.mxFrame;
/* Allocate space for the WalIterator object */ /* Allocate space for the WalIterator object */
iLast = pWal->hdr.mxFrame; nSegment = walFramePage(iLast) + 1;
nSegment = (iLast >> 8) + 1; nByte = sizeof(WalIterator)
nFinal = (iLast & 0x000000FF); + nSegment*(sizeof(struct WalSegment))
nByte = sizeof(WalIterator) + (nSegment+1)*(sizeof(struct WalSegment)+256); + (nSegment+1)*(HASHTABLE_NPAGE * sizeof(HASHTABLE_DATATYPE));
p = (WalIterator *)sqlite3_malloc(nByte); p = (WalIterator *)sqlite3_malloc(nByte);
if( !p ){ if( !p ){
return SQLITE_NOMEM; return SQLITE_NOMEM;
} }
memset(p, 0, nByte); memset(p, 0, nByte);
/* Initialize the WalIterator object. Each 256-entry segment is /* Allocate space for the WalIterator object */
** presorted in order to make iterating through all entries much
** faster.
*/
p->nSegment = nSegment; p->nSegment = nSegment;
aSpace = (u8 *)&p->aSegment[nSegment]; aSpace = (HASHTABLE_DATATYPE *)&p->aSegment[nSegment];
aTmp = &aSpace[nSegment*256]; aTmp = &aSpace[HASHTABLE_NPAGE*nSegment];
for(i=0; i<nSegment; i++){ for(i=0; i<nSegment; i++){
volatile HASHTABLE_DATATYPE *pDummy;
int j; int j;
int nIndex = (i==nSegment-1) ? nFinal : 256; u32 iZero;
p->aSegment[i].aPgno = &aData[walIndexEntry(i*256+1)]; int nEntry;
p->aSegment[i].aIndex = aSpace; volatile u32 *aPgno;
for(j=0; j<nIndex; j++){
walHashGet(pWal, i, &pDummy, &aPgno, &iZero);
if( i==(nSegment-1) ){
nEntry = iLast - iZero;
}else if( i==0 ){
nEntry = HASHTABLE_NPAGE_ONE;
}else{
nEntry = HASHTABLE_NPAGE;
}
iZero++;
aPgno += iZero;
for(j=0; j<nEntry; j++){
aSpace[j] = j; aSpace[j] = j;
} }
walMergesort8(p->aSegment[i].aPgno, aTmp, aSpace, &nIndex); walMergesort((u32 *)aPgno, aTmp, aSpace, &nEntry);
memset(&aSpace[nIndex], aSpace[nIndex-1], 256-nIndex); p->aSegment[i].iZero = iZero;
aSpace += 256; p->aSegment[i].nEntry = nEntry;
p->nFinal = nIndex; p->aSegment[i].aIndex = aSpace;
p->aSegment[i].aPgno = (u32 *)aPgno;
aSpace += HASHTABLE_NPAGE;
} }
assert( aSpace==aTmp );
/* Return the fully initializd WalIterator object */ /* Return the fully initialized WalIterator object */
*pp = p; *pp = p;
return SQLITE_OK ; return SQLITE_OK ;
} }
@ -1430,8 +1412,8 @@ static int walCheckpoint(
** cannot be backfilled from the WAL. ** cannot be backfilled from the WAL.
*/ */
mxSafeFrame = pWal->hdr.mxFrame; mxSafeFrame = pWal->hdr.mxFrame;
pHdr = (volatile WalIndexHdr*)pWal->pWiData; walIndexPage(pWal, 0, (volatile u32 **)&pHdr);
pInfo = (volatile WalCkptInfo*)&pHdr[2]; pInfo = walCkptInfo(pWal);
assert( pInfo==walCkptInfo(pWal) ); assert( pInfo==walCkptInfo(pWal) );
for(i=1; i<WAL_NREADER; i++){ for(i=1; i<WAL_NREADER; i++){
u32 y = pInfo->aReadMark[i]; u32 y = pInfo->aReadMark[i];
@ -1461,6 +1443,7 @@ static int walCheckpoint(
/* Iterate through the contents of the WAL, copying data to the db file. */ /* Iterate through the contents of the WAL, copying data to the db file. */
while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
assert( walFramePgno(pWal, iFrame)==iDbpage );
if( iFrame<=nBackfill || iFrame>mxSafeFrame ) continue; if( iFrame<=nBackfill || iFrame>mxSafeFrame ) continue;
rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage,
walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE
@ -1525,7 +1508,6 @@ int sqlite3WalClose(
if( rc==SQLITE_OK ){ if( rc==SQLITE_OK ){
isDelete = 1; isDelete = 1;
} }
walIndexUnmap(pWal);
} }
walIndexClose(pWal, isDelete); walIndexClose(pWal, isDelete);
@ -1534,6 +1516,7 @@ int sqlite3WalClose(
sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0); sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0);
} }
WALTRACE(("WAL%p: closed\n", pWal)); WALTRACE(("WAL%p: closed\n", pWal));
sqlite3_free(pWal->apWiData);
sqlite3_free(pWal); sqlite3_free(pWal);
} }
return rc; return rc;
@ -1560,13 +1543,14 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){
u32 aCksum[2]; /* Checksum on the header content */ u32 aCksum[2]; /* Checksum on the header content */
WalIndexHdr h1, h2; /* Two copies of the header content */ WalIndexHdr h1, h2; /* Two copies of the header content */
WalIndexHdr *aHdr; /* Header in shared memory */ WalIndexHdr *aHdr; /* Header in shared memory */
volatile u32 *page1 = 0;
if( pWal->szWIndex < WALINDEX_HDR_SIZE ){ walIndexPage(pWal, 0, &page1);
if( !page1 ){
/* The wal-index is not large enough to hold the header, then assume /* The wal-index is not large enough to hold the header, then assume
** header is invalid. */ ** header is invalid. */
return 1; return 1;
} }
assert( pWal->pWiData );
/* Read the header. This might happen currently with a write to the /* Read the header. This might happen currently with a write to the
** same area of shared memory on a different CPU in a SMP, ** same area of shared memory on a different CPU in a SMP,
@ -1578,7 +1562,7 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){
** Memory barriers are used to prevent the compiler or the hardware from ** Memory barriers are used to prevent the compiler or the hardware from
** reordering the reads and writes. ** reordering the reads and writes.
*/ */
aHdr = (WalIndexHdr*)pWal->pWiData; aHdr = (WalIndexHdr*)page1;
memcpy(&h1, &aHdr[0], sizeof(h1)); memcpy(&h1, &aHdr[0], sizeof(h1));
sqlite3OsShmBarrier(pWal->pDbFd); sqlite3OsShmBarrier(pWal->pDbFd);
memcpy(&h2, &aHdr[1], sizeof(h2)); memcpy(&h2, &aHdr[1], sizeof(h2));
@ -1625,9 +1609,10 @@ int walIndexTryHdr(Wal *pWal, int *pChanged){
static int walIndexReadHdr(Wal *pWal, int *pChanged){ static int walIndexReadHdr(Wal *pWal, int *pChanged){
int rc; /* Return code */ int rc; /* Return code */
int badHdr; /* True if a header read failed */ int badHdr; /* True if a header read failed */
volatile u32 *dummy;
assert( pChanged ); assert( pChanged );
rc = walIndexMap(pWal, walMappingSize(1)); rc = walIndexPage(pWal, 0, &dummy);
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
return rc; return rc;
} }
@ -1659,14 +1644,6 @@ static int walIndexReadHdr(Wal *pWal, int *pChanged){
} }
} }
/* Make sure the mapping is large enough to cover the entire wal-index */
if( rc==SQLITE_OK ){
int szWanted = walMappingSize(pWal->hdr.mxFrame);
if( pWal->szWIndex<szWanted ){
rc = walIndexMap(pWal, szWanted);
}
}
return rc; return rc;
} }
@ -1710,7 +1687,7 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
u32 mxReadMark; /* Largest aReadMark[] value */ u32 mxReadMark; /* Largest aReadMark[] value */
int mxI; /* Index of largest aReadMark[] value */ int mxI; /* Index of largest aReadMark[] value */
int i; /* Loop counter */ int i; /* Loop counter */
int rc; /* Return code */ int rc = SQLITE_OK; /* Return code */
assert( pWal->readLock<0 ); /* Not currently locked */ assert( pWal->readLock<0 ); /* Not currently locked */
@ -1739,16 +1716,14 @@ static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
rc = SQLITE_BUSY_RECOVERY; rc = SQLITE_BUSY_RECOVERY;
} }
} }
}else{
rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
} }
if( rc!=SQLITE_OK ){ if( rc!=SQLITE_OK ){
return rc; return rc;
} }
pHdr = (volatile WalIndexHdr*)pWal->pWiData; walIndexPage(pWal, 0, (volatile u32 **)&pHdr);
pInfo = (volatile WalCkptInfo*)&pHdr[2]; pInfo = walCkptInfo(pWal);
assert( pInfo==walCkptInfo(pWal) ); assert( pInfo==(volatile WalCkptInfo *)&pHdr[2] );
if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){ if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){
/* The WAL has been completely backfilled (or it is empty). /* The WAL has been completely backfilled (or it is empty).
** and can be safely ignored. ** and can be safely ignored.
@ -1883,7 +1858,6 @@ int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
do{ do{
rc = walTryBeginRead(pWal, pChanged, 0, ++cnt); rc = walTryBeginRead(pWal, pChanged, 0, ++cnt);
}while( rc==WAL_RETRY ); }while( rc==WAL_RETRY );
walIndexUnmap(pWal);
return rc; return rc;
} }
@ -1913,7 +1887,6 @@ int sqlite3WalRead(
int nOut, /* Size of buffer pOut in bytes */ int nOut, /* Size of buffer pOut in bytes */
u8 *pOut /* Buffer to write page data to */ u8 *pOut /* Buffer to write page data to */
){ ){
int rc; /* Return code */
u32 iRead = 0; /* If !=0, WAL frame to return data from */ u32 iRead = 0; /* If !=0, WAL frame to return data from */
u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */ u32 iLast = pWal->hdr.mxFrame; /* Last page in WAL for this reader */
int iHash; /* Used to loop through N hash tables */ int iHash; /* Used to loop through N hash tables */
@ -1932,12 +1905,6 @@ int sqlite3WalRead(
return SQLITE_OK; return SQLITE_OK;
} }
/* Ensure the wal-index is mapped. */
rc = walIndexMap(pWal, walMappingSize(iLast));
if( rc!=SQLITE_OK ){
return rc;
}
/* Search the hash table or tables for an entry matching page number /* Search the hash table or tables for an entry matching page number
** pgno. Each iteration of the following for() loop searches one ** pgno. Each iteration of the following for() loop searches one
** hash table (each hash table indexes up to HASHTABLE_NPAGE frames). ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames).
@ -1963,16 +1930,13 @@ int sqlite3WalRead(
** This condition filters out entries that were added to the hash ** This condition filters out entries that were added to the hash
** table after the current read-transaction had started. ** table after the current read-transaction had started.
*/ */
for(iHash=iLast; iHash>0 && iRead==0; iHash-=HASHTABLE_NPAGE){ for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){
volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table */ volatile HASHTABLE_DATATYPE *aHash; /* Pointer to hash table */
volatile u32 *aPgno; /* Pointer to array of page numbers */ volatile u32 *aPgno; /* Pointer to array of page numbers */
u32 iZero; /* Frame number corresponding to aPgno[0] */ u32 iZero; /* Frame number corresponding to aPgno[0] */
int iKey; /* Hash slot index */ int iKey; /* Hash slot index */
int mxHash; /* upper bound on aHash[] values */
walHashFind(pWal, iHash, &aHash, &aPgno, &iZero); walHashGet(pWal, iHash, &aHash, &aPgno, &iZero);
mxHash = iLast - iZero;
if( mxHash > HASHTABLE_NPAGE ) mxHash = HASHTABLE_NPAGE;
for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){ for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
u32 iFrame = aHash[iKey] + iZero; u32 iFrame = aHash[iKey] + iZero;
if( iFrame<=iLast && aPgno[iFrame]==pgno ){ if( iFrame<=iLast && aPgno[iFrame]==pgno ){
@ -1981,7 +1945,6 @@ int sqlite3WalRead(
} }
} }
} }
assert( iRead==0 || pWal->pWiData[walIndexEntry(iRead)]==pgno );
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT #ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
/* If expensive assert() statements are available, do a linear search /* If expensive assert() statements are available, do a linear search
@ -1991,7 +1954,7 @@ int sqlite3WalRead(
u32 iRead2 = 0; u32 iRead2 = 0;
u32 iTest; u32 iTest;
for(iTest=iLast; iTest>0; iTest--){ for(iTest=iLast; iTest>0; iTest--){
if( pWal->pWiData[walIndexEntry(iTest)]==pgno ){ if( walFramePgno(pWal, iTest)==pgno ){
iRead2 = iTest; iRead2 = iTest;
break; break;
} }
@ -2003,7 +1966,6 @@ int sqlite3WalRead(
/* If iRead is non-zero, then it is the log frame number that contains the /* If iRead is non-zero, then it is the log frame number that contains the
** required page. Read and return data from the log file. ** required page. Read and return data from the log file.
*/ */
walIndexUnmap(pWal);
if( iRead ){ if( iRead ){
i64 iOffset = walFrameOffset(iRead, pWal->hdr.szPage) + WAL_FRAME_HDRSIZE; i64 iOffset = walFrameOffset(iRead, pWal->hdr.szPage) + WAL_FRAME_HDRSIZE;
*pInWal = 1; *pInWal = 1;
@ -2039,6 +2001,7 @@ void sqlite3WalDbsize(Wal *pWal, Pgno *pPgno){
*/ */
int sqlite3WalBeginWriteTransaction(Wal *pWal){ int sqlite3WalBeginWriteTransaction(Wal *pWal){
int rc; int rc;
volatile u32 *page1;
/* Cannot start a write transaction without first holding a read /* Cannot start a write transaction without first holding a read
** transaction. */ ** transaction. */
@ -2057,19 +2020,13 @@ int sqlite3WalBeginWriteTransaction(Wal *pWal){
** time the read transaction on this connection was started, then ** time the read transaction on this connection was started, then
** the write is disallowed. ** the write is disallowed.
*/ */
rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)); walIndexPage(pWal, 0, &page1);
if( rc ){ if( memcmp(&pWal->hdr, (void*)page1, sizeof(WalIndexHdr))!=0 ){
walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
pWal->writeLock = 0;
return rc;
}
if( memcmp(&pWal->hdr, (void*)pWal->pWiData, sizeof(WalIndexHdr))!=0 ){
walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1); walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
pWal->writeLock = 0; pWal->writeLock = 0;
rc = SQLITE_BUSY; rc = SQLITE_BUSY;
} }
walIndexUnmap(pWal);
return rc; return rc;
} }
@ -2102,11 +2059,7 @@ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
Pgno iMax = pWal->hdr.mxFrame; Pgno iMax = pWal->hdr.mxFrame;
Pgno iFrame; Pgno iFrame;
assert( pWal->pWiData==0 );
rc = walIndexReadHdr(pWal, &unused); rc = walIndexReadHdr(pWal, &unused);
if( rc==SQLITE_OK ){
rc = walIndexMap(pWal, walMappingSize(iMax));
}
if( rc==SQLITE_OK ){ if( rc==SQLITE_OK ){
for(iFrame=pWal->hdr.mxFrame+1; for(iFrame=pWal->hdr.mxFrame+1;
ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; ALWAYS(rc==SQLITE_OK) && iFrame<=iMax;
@ -2124,12 +2077,11 @@ int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
** committed. As a result, the call to xUndo may not fail. ** committed. As a result, the call to xUndo may not fail.
*/ */
assert( pWal->writeLock ); assert( pWal->writeLock );
assert( pWal->pWiData[walIndexEntry(iFrame)]!=1 ); assert( walFramePgno(pWal, iFrame)!=1 );
rc = xUndo(pUndoCtx, pWal->pWiData[walIndexEntry(iFrame)]); rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame));
} }
walCleanupHash(pWal); walCleanupHash(pWal);
} }
walIndexUnmap(pWal);
} }
return rc; return rc;
} }
@ -2170,7 +2122,6 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
} }
if( aWalData[0]<pWal->hdr.mxFrame ){ if( aWalData[0]<pWal->hdr.mxFrame ){
rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame));
pWal->hdr.mxFrame = aWalData[0]; pWal->hdr.mxFrame = aWalData[0];
pWal->hdr.aFrameCksum[0] = aWalData[1]; pWal->hdr.aFrameCksum[0] = aWalData[1];
pWal->hdr.aFrameCksum[1] = aWalData[2]; pWal->hdr.aFrameCksum[1] = aWalData[2];
@ -2179,7 +2130,6 @@ int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
} }
} }
walIndexUnmap(pWal);
return rc; return rc;
} }
@ -2199,9 +2149,7 @@ static int walRestartLog(Wal *pWal){
int rc = SQLITE_OK; int rc = SQLITE_OK;
int cnt; int cnt;
if( pWal->readLock==0 if( pWal->readLock==0 ){
&& SQLITE_OK==(rc = walIndexMap(pWal, walMappingSize(pWal->hdr.mxFrame)))
){
volatile WalCkptInfo *pInfo = walCkptInfo(pWal); volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
assert( pInfo->nBackfill==pWal->hdr.mxFrame ); assert( pInfo->nBackfill==pWal->hdr.mxFrame );
if( pInfo->nBackfill>0 ){ if( pInfo->nBackfill>0 ){
@ -2237,11 +2185,6 @@ static int walRestartLog(Wal *pWal){
int notUsed; int notUsed;
rc = walTryBeginRead(pWal, &notUsed, 1, ++cnt); rc = walTryBeginRead(pWal, &notUsed, 1, ++cnt);
}while( rc==WAL_RETRY ); }while( rc==WAL_RETRY );
/* Unmap the wal-index before returning. Otherwise the VFS layer may
** hold a mutex for the duration of the IO performed by WalFrames().
*/
walIndexUnmap(pWal);
} }
return rc; return rc;
} }
@ -2267,7 +2210,6 @@ int sqlite3WalFrames(
assert( pList ); assert( pList );
assert( pWal->writeLock ); assert( pWal->writeLock );
assert( pWal->pWiData==0 );
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG) #if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
{ int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){} { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
@ -2280,10 +2222,8 @@ int sqlite3WalFrames(
** log file, instead of appending to it at pWal->hdr.mxFrame. ** log file, instead of appending to it at pWal->hdr.mxFrame.
*/ */
if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){ if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){
assert( pWal->pWiData==0 );
return rc; return rc;
} }
assert( pWal->pWiData==0 && pWal->readLock>0 );
/* If this is the first frame written into the log, write the WAL /* If this is the first frame written into the log, write the WAL
** header to the start of the WAL file. See comments at the top of ** header to the start of the WAL file. See comments at the top of
@ -2358,7 +2298,6 @@ int sqlite3WalFrames(
rc = sqlite3OsSync(pWal->pWalFd, sync_flags); rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
} }
assert( pWal->pWiData==0 );
/* Append data to the wal-index. It is not necessary to lock the /* Append data to the wal-index. It is not necessary to lock the
** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index
@ -2391,7 +2330,6 @@ int sqlite3WalFrames(
} }
} }
walIndexUnmap(pWal);
WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok")); WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok"));
return rc; return rc;
} }
@ -2412,7 +2350,6 @@ int sqlite3WalCheckpoint(
int rc; /* Return code */ int rc; /* Return code */
int isChanged = 0; /* True if a new wal-index header is loaded */ int isChanged = 0; /* True if a new wal-index header is loaded */
assert( pWal->pWiData==0 );
assert( pWal->ckptLock==0 ); assert( pWal->ckptLock==0 );
WALTRACE(("WAL%p: checkpoint begins\n", pWal)); WALTRACE(("WAL%p: checkpoint begins\n", pWal));
@ -2441,7 +2378,6 @@ int sqlite3WalCheckpoint(
} }
/* Release the locks. */ /* Release the locks. */
walIndexUnmap(pWal);
walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
pWal->ckptLock = 0; pWal->ckptLock = 0;
WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok")); WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok"));

View File

@ -13,6 +13,7 @@
set testdir [file dirname $argv0] set testdir [file dirname $argv0]
source $testdir/tester.tcl source $testdir/tester.tcl
db close
# Argument processing. # Argument processing.
# #

View File

@ -75,9 +75,14 @@ proc incr_tvfs_hdr {file idx incrval} {
# database content. # database content.
# #
do_test wal2-1.0 { do_test wal2-1.0 {
proc tvfs_cb {method args} { return SQLITE_OK } proc tvfs_cb {method filename args} {
set ::filename $filename
return SQLITE_OK
}
testvfs tvfs testvfs tvfs
tvfs script tvfs_cb tvfs script tvfs_cb
tvfs filter xShmOpen
sqlite3 db test.db -vfs tvfs sqlite3 db test.db -vfs tvfs
sqlite3 db2 test.db -vfs tvfs sqlite3 db2 test.db -vfs tvfs
@ -123,21 +128,15 @@ foreach {tn iInsert res wal_index_hdr_mod wal_locks} "
do_test wal2-1.$tn.1 { do_test wal2-1.$tn.1 {
execsql { INSERT INTO t1 VALUES($iInsert) } execsql { INSERT INTO t1 VALUES($iInsert) }
set ::locks [list] set ::locks [list]
set ::cb_done 0
proc tvfs_cb {method args} { proc tvfs_cb {method args} {
if {$::cb_done == 0 && $method == "xShmGet"} { lappend ::locks [lindex $args 2]
set ::cb_done 1
if {$::wal_index_hdr_mod >= 0} {
incr_tvfs_hdr [lindex $args 0] $::wal_index_hdr_mod 1
}
}
if {$method == "xShmLock"} { lappend ::locks [lindex $args 2] }
return SQLITE_OK return SQLITE_OK
} }
tvfs filter xShmLock
if {$::wal_index_hdr_mod >= 0} {
incr_tvfs_hdr $::filename $::wal_index_hdr_mod 1
}
execsql { SELECT count(a), sum(a) FROM t1 } db2 execsql { SELECT count(a), sum(a) FROM t1 } db2
} $res } $res
@ -174,8 +173,9 @@ do_test wal2-2.0 {
testvfs tvfs testvfs tvfs
tvfs script tvfs_cb tvfs script tvfs_cb
tvfs filter xShmOpen
proc tvfs_cb {method args} { proc tvfs_cb {method args} {
if {$method == "xShmOpen"} { set ::shm_file [lindex $args 0] } set ::filename [lindex $args 0]
return SQLITE_OK return SQLITE_OK
} }
@ -208,32 +208,28 @@ foreach {tn iInsert res0 res1 wal_index_hdr_mod} {
8 11 {10 55} {11 66} 6 8 11 {10 55} {11 66} 6
9 12 {11 66} {12 78} 7 9 12 {11 66} {12 78} 7
} { } {
tvfs filter xShmLock
do_test wal2-2.$tn.1 { do_test wal2-2.$tn.1 {
set oldhdr [set_tvfs_hdr $::shm_file] set oldhdr [set_tvfs_hdr $::filename]
execsql { INSERT INTO t1 VALUES($iInsert) } execsql { INSERT INTO t1 VALUES($iInsert) }
execsql { SELECT count(a), sum(a) FROM t1 } execsql { SELECT count(a), sum(a) FROM t1 }
} $res1 } $res1
do_test wal2-2.$tn.2 { do_test wal2-2.$tn.2 {
set ::locks [list] set ::locks [list]
set ::cb_done 0
proc tvfs_cb {method args} { proc tvfs_cb {method args} {
if {$::cb_done == 0 && $method == "xShmGet"} {
set ::cb_done 1
if {$::wal_index_hdr_mod >= 0} {
incr_tvfs_hdr $::shm_file $::wal_index_hdr_mod 1
}
}
if {$method == "xShmLock"} {
set lock [lindex $args 2] set lock [lindex $args 2]
lappend ::locks $lock lappend ::locks $lock
if {$lock == $::WRITER} { if {$lock == $::WRITER} {
set_tvfs_hdr $::shm_file $::oldhdr set_tvfs_hdr $::filename $::oldhdr
}
} }
return SQLITE_OK return SQLITE_OK
} }
if {$::wal_index_hdr_mod >= 0} {
incr_tvfs_hdr $::filename $::wal_index_hdr_mod 1
}
execsql { SELECT count(a), sum(a) FROM t1 } db2 execsql { SELECT count(a), sum(a) FROM t1 } db2
} $res0 } $res0
@ -243,21 +239,15 @@ foreach {tn iInsert res0 res1 wal_index_hdr_mod} {
do_test wal2-2.$tn.4 { do_test wal2-2.$tn.4 {
set ::locks [list] set ::locks [list]
set ::cb_done 0
proc tvfs_cb {method args} { proc tvfs_cb {method args} {
if {$::cb_done == 0 && $method == "xShmGet"} {
set ::cb_done 1
if {$::wal_index_hdr_mod >= 0} {
incr_tvfs_hdr $::shm_file $::wal_index_hdr_mod 1
}
}
if {$method == "xShmLock"} {
set lock [lindex $args 2] set lock [lindex $args 2]
lappend ::locks $lock lappend ::locks $lock
}
return SQLITE_OK return SQLITE_OK
} }
if {$::wal_index_hdr_mod >= 0} {
incr_tvfs_hdr $::filename $::wal_index_hdr_mod 1
}
execsql { SELECT count(a), sum(a) FROM t1 } db2 execsql { SELECT count(a), sum(a) FROM t1 } db2
} $res1 } $res1
} }

View File

@ -353,7 +353,7 @@ T script method_callback
proc method_callback {method args} { proc method_callback {method args} {
if {$method == "xShmBarrier"} { if {$method == "xShmBarrier"} {
incr ::barrier_count incr ::barrier_count
if {$::barrier_count == 1} { if {$::barrier_count == 2} {
# This code is executed within the xShmBarrier() callback invoked # This code is executed within the xShmBarrier() callback invoked
# by the client running recovery as part of writing the recovered # by the client running recovery as part of writing the recovered
# wal-index header. If a second client attempts to access the # wal-index header. If a second client attempts to access the