Add code for the atomic-write optimisation. Disabled by default. (CVS 4268)

FossilOrigin-Name: 581fadfe31757e3fb97b12f93c1e3c085e4b6009
This commit is contained in:
danielk1977 2007-08-22 11:22:03 +00:00
parent 1f12f9aa30
commit c7b6017c8a
7 changed files with 356 additions and 30 deletions

View File

@ -57,7 +57,7 @@ TCCX = $(TCC) $(OPTS) $(THREADSAFE) $(USLEEP) -I. -I$(TOP)/src
#
LIBOBJ+= alter.o analyze.o attach.o auth.o btree.o build.o \
callback.o complete.o date.o delete.o \
expr.o func.o hash.o insert.o loadext.o \
expr.o func.o hash.o insert.o journal.o loadext.o \
main.o malloc.o mem1.o mem2.o mutex.o \
opcodes.o os.o os_os2.o os_unix.o os_win.o \
pager.o parse.o pragma.o prepare.o printf.o random.o \
@ -103,6 +103,7 @@ SRC = \
$(TOP)/src/hash.c \
$(TOP)/src/hash.h \
$(TOP)/src/insert.c \
$(TOP)/src/journal.c \
$(TOP)/src/legacy.c \
$(TOP)/src/loadext.c \
$(TOP)/src/main.c \
@ -366,6 +367,9 @@ hash.o: $(TOP)/src/hash.c $(HDR)
insert.o: $(TOP)/src/insert.c $(HDR)
$(TCCX) -c $(TOP)/src/insert.c
journal.o: $(TOP)/src/journal.c $(HDR)
$(TCCX) -c $(TOP)/src/journal.c
legacy.o: $(TOP)/src/legacy.c $(HDR)
$(TCCX) -c $(TOP)/src/legacy.c

View File

@ -1,5 +1,5 @@
C Remove\sa\sdebugging\sbtree_breakpoint\sfrom\svtab1.test.\s(CVS\s4267)
D 2007-08-22T02:57:17
C Add\scode\sfor\sthe\satomic-write\soptimisation.\sDisabled\sby\sdefault.\s(CVS\s4268)
D 2007-08-22T11:22:04
F Makefile.in 0c0e53720f658c7a551046442dd7afba0b72bfbe
F Makefile.linux-gcc 65241babba6faf1152bf86574477baab19190499
F README 9c4e2d6706bdcc3efdd773ce752a8cdab4f90028
@ -63,7 +63,7 @@ F ext/icu/README.txt 3b130aa66e7a681136f6add198b076a2f90d1e33
F ext/icu/icu.c 61a345d8126686aa3487aa8d2d0f68abd655f7a4
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895
F ltmain.sh 56abb507100ed2d4261f6dd1653dec3cf4066387
F main.mk b1d97fe47a0633bc2ee5a09c7fdf7f4105156437
F main.mk 9e796bb4e04ca16d3d1506e6496a7468410dd441
F mkdll.sh 37fa8a7412e51b5ab2bc6d4276135f022a0feffb
F mkextu.sh 416f9b7089d80e5590a29692c9d9280a10dbad9f
F mkextw.sh 1a866b53637dab137191341cc875575a5ca110fb
@ -94,6 +94,7 @@ F src/func.c 36440cb02589fd4697cbbf0b351eeedc160d1f4b
F src/hash.c 2f322979071dd2bdba7503b5276d66f028744382
F src/hash.h 3ad3da76bfb954978d227bf495568b0e6da2c19e
F src/insert.c 633322aef1799f6604fa805e12488bc628570b0c
F src/journal.c 5ba2a1443b181741d3f2984d9d49e730073d74d1
F src/legacy.c a83519a8fbb488c3155fca577b010d590ec479e9
F src/limits.h 71ab25f17e35e0a9f3f6f234b8ed49cc56731d35
F src/loadext.c dd803303fd06ef0b13913faaa4a7fc7d8c8c4e77
@ -103,7 +104,7 @@ F src/md5.c c5fdfa5c2593eaee2e32a5ce6c6927c986eaf217
F src/mem1.c 30bf8be3846f92fdf88c490c5e5378512383bcbe
F src/mem2.c 482f0aaf14e8ef1db64cb8c5b9a9bfe708297c92
F src/mutex.c 9cf641f556a4119ef90ed41b82f2d5647f81686e
F src/os.c 89b93d67bc436c2d9df4b5d296f30a59144e55bb
F src/os.c d8f029317c95dcd2887b9f0f154281cdfbd303ad
F src/os.h 399c89cafa93b9ef35c3dc70f77644d10936b535
F src/os_common.h a5c446d3b93f09f369d13bf217de4bed3437dd1c
F src/os_os2.c 8769301bff502de642ad2634cedcb77d967ce199
@ -114,7 +115,7 @@ F src/os_unix.c 7aad42b1ee70d68034a4ac45fa822edccdc3d9e6
F src/os_unix.h 5768d56d28240d3fe4537fac08cc85e4fb52279e
F src/os_win.c 29c0e19c1072679a4c7818c49fab2f35d2ad7747
F src/os_win.h 41a946bea10f61c158ce8645e7646b29d44f122b
F src/pager.c d68e8c7b7e258c3e22c7872b602ff1b00d6cb41a
F src/pager.c 3568c1c557b2ff8faf054732abd32bc640116684
F src/pager.h 53087c6fb9db01aed17c7fd044662a27507e89b8
F src/parse.y 2d2ce439dc6184621fb0b86f4fc5aca7f391a590
F src/pragma.c 9b989506a1b7c8aecd6befb8235e2f57a4aba7e5
@ -126,7 +127,7 @@ F src/server.c 087b92a39d883e3fa113cae259d64e4c7438bc96
F src/shell.c ac29402b538515fa4697282387be9c1205e6e9eb
F src/sqlite.h.in 39f920631c49a8a79502d8b7908e608d7a0029bd
F src/sqlite3ext.h 647a6b8a8f76ff6c9611e4a071531d8e63ff2d6b
F src/sqliteInt.h 23eb6a5b1f10d5d3d34c3c7846b7c3b93acf1276
F src/sqliteInt.h 74eb61d596c0fb308a5ec07f060318f3a83474df
F src/sqliteLimit.h f14609c27636ebc217c9603ade26dbdd7d0f6afa
F src/table.c c725e47f6f3092b9a7b569fc58e408e2173ee008
F src/tclsqlite.c 92e06e076d613484aa2afc5ad830d9080de92347
@ -558,7 +559,7 @@ F www/tclsqlite.tcl 8be95ee6dba05eabcd27a9d91331c803f2ce2130
F www/vdbe.tcl 87a31ace769f20d3627a64fa1fade7fed47b90d0
F www/version3.tcl 890248cf7b70e60c383b0e84d77d5132b3ead42b
F www/whentouse.tcl fc46eae081251c3c181bd79c5faef8195d7991a5
P 783e07d561d1f5509de9475f3b9f38315f247002
R c5d3ce86bb4b05fc274208eefc6e2108
U drh
Z 9b8a52ba0f42f58ff0496debe78e0c8e
P 6cc8641ed2fbfc32d418ce153f2819052f230be2
R 60a77806fdc37cc90a1c02666401cadd
U danielk1977
Z ac8c850ecd87e6662e4865fbe6fa7e4b

View File

@ -1 +1 @@
6cc8641ed2fbfc32d418ce153f2819052f230be2
581fadfe31757e3fb97b12f93c1e3c085e4b6009

228
src/journal.c Normal file
View File

@ -0,0 +1,228 @@
/*
** 2007 August 22
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
*************************************************************************
**
** @(#) $Id: journal.c,v 1.1 2007/08/22 11:22:04 danielk1977 Exp $
*/
#ifdef SQLITE_ENABLE_ATOMIC_WRITE
/*
** This file implements a special kind of sqlite3_file object used
** by SQLite to create journal files if the atomic-write optimization
** is enabled.
**
** The distinctive characteristic of this sqlite3_file is that the
** actual on disk file is created lazily. When the file is created,
** the caller specifies a buffer size for an in-memory buffer to
** be used to service read() and write() requests. The actual file
** on disk is not created or populated until either:
**
** 1) The in-memory representation grows too large for the allocated
** buffer, or
** 2) The xSync() method is called.
*/
#include "sqliteInt.h"
struct JournalFile {
sqlite3_io_methods *pMethod;
int nBuf;
char *zBuf;
int iSize;
int flags;
sqlite3_vfs *pVfs;
sqlite3_file *pReal;
const char *zJournal;
};
typedef struct JournalFile JournalFile;
/*
** If it does not already exists, create and populate the on-disk file
** for JournalFile p.
*/
static int createFile(JournalFile *p){
int rc = SQLITE_OK;
if( !p->pReal ){
p->pReal = (sqlite3_file *)&p[1];
rc = sqlite3OsOpen(p->pVfs, p->zJournal, p->pReal, p->flags, 0);
if( rc==SQLITE_OK && p->iSize>0 ){
assert(p->iSize<=p->nBuf);
rc = sqlite3OsWrite(p->pReal, p->zBuf, p->iSize, 0);
}
}
return rc;
}
/*
** Close the file.
*/
static int jrnlClose(sqlite3_file *pJfd){
JournalFile *p = (JournalFile *)pJfd;
if( p->pReal ){
sqlite3OsClose(p->pReal);
}
sqlite3_free(p->zBuf);
return SQLITE_OK;
}
/*
** Read data from the file.
*/
static int jrnlRead(
sqlite3_file *pJfd,
void *zBuf,
int iAmt,
sqlite_int64 iOfst
){
int rc = SQLITE_OK;
JournalFile *p = (JournalFile *)pJfd;
if( p->pReal ){
rc = sqlite3OsRead(p->pReal, zBuf, iAmt, iOfst);
}else{
int n = iAmt;
memset(zBuf, 0, n);
if( n+iOfst>p->iSize ){
rc = SQLITE_IOERR_SHORT_READ;
}else{
memcpy(zBuf, &p->zBuf[iOfst], n);
}
}
return rc;
}
/*
** Write data to the file.
*/
static int jrnlWrite(
sqlite3_file *pJfd,
const void *zBuf,
int iAmt,
sqlite_int64 iOfst
){
int rc = SQLITE_OK;
JournalFile *p = (JournalFile *)pJfd;
if( !p->pReal && (iOfst+iAmt)>p->nBuf ){
rc = createFile(p);
}
if( rc==SQLITE_OK ){
if( p->pReal ){
rc = sqlite3OsWrite(p->pReal, zBuf, iAmt, iOfst);
}else{
memcpy(&p->zBuf[iOfst], zBuf, iAmt);
if( p->iSize<(iOfst+iAmt) ){
p->iSize = (iOfst+iAmt);
}
}
}
return rc;
}
/*
** Truncate the file.
*/
static int jrnlTruncate(sqlite3_file *pJfd, sqlite_int64 size){
int rc = SQLITE_OK;
JournalFile *p = (JournalFile *)pJfd;
if( p->pReal ){
rc = sqlite3OsTruncate(p->pReal, size);
}else if( size>p->iSize ){
p->iSize = size;
}
return rc;
}
/*
** Sync the file.
*/
static int jrnlSync(sqlite3_file *pJfd, int flags){
int rc;
JournalFile *p = (JournalFile *)pJfd;
rc = createFile(p);
if( rc==SQLITE_OK ){
rc = sqlite3OsSync(p->pReal, flags);
}
return rc;
}
/*
** Query the size of the file in bytes.
*/
static int jrnlFileSize(sqlite3_file *pJfd, sqlite_int64 *pSize){
int rc = SQLITE_OK;
JournalFile *p = (JournalFile *)pJfd;
if( p->pReal ){
rc = sqlite3OsFileSize(p->pReal, pSize);
}else{
*pSize = (sqlite_int64) p->iSize;
}
return rc;
}
/*
** Table of methods for JournalFile sqlite3_file object.
*/
static struct sqlite3_io_methods JournalFileMethods = {
1, /* iVersion */
jrnlClose, /* xClose */
jrnlRead, /* xRead */
jrnlWrite, /* xWrite */
jrnlTruncate, /* xTruncate */
jrnlSync, /* xSync */
jrnlFileSize, /* xFileSize */
0, /* xLock */
0, /* xUnlock */
0, /* xCheckReservedLock */
0, /* xBreakLock */
0, /* xLockState */
0, /* xSectorSize */
0 /* xDeviceCharacteristics */
};
/*
** Open a journal file.
*/
int sqlite3JournalOpen(
sqlite3_vfs *pVfs,
const char *zName,
sqlite3_file *pJfd,
int flags,
int nBuf
){
JournalFile *p = (JournalFile *)pJfd;
memset(p, 0, sqlite3JournalSize(pVfs));
if( nBuf>0 ){
p->zBuf = sqlite3MallocZero(nBuf);
if( !p->zBuf ){
return SQLITE_NOMEM;
}
}else{
return sqlite3OsOpen(pVfs, zName, pJfd, flags, 0);
}
p->pMethod = &JournalFileMethods;
p->nBuf = nBuf;
p->flags = flags;
p->zJournal = zName;
p->pVfs = pVfs;
return SQLITE_OK;
}
/*
** Return the number of bytes required to store a JournalFile that uses vfs
** pVfs to create the underlying on-disk files.
*/
int sqlite3JournalSize(sqlite3_vfs *pVfs){
return (pVfs->szOsFile+sizeof(JournalFile));
}
#endif

View File

@ -24,8 +24,12 @@
** C++ instead of plain old C.
*/
int sqlite3OsClose(sqlite3_file *pId){
if( !pId->pMethods ) return SQLITE_OK;
return pId->pMethods->xClose(pId);
int rc = SQLITE_OK;
if( pId->pMethods ){
rc = pId->pMethods->xClose(pId);
pId->pMethods = 0;
}
return rc;
}
int sqlite3OsRead(sqlite3_file *id, void *pBuf, int amt, i64 offset){
return id->pMethods->xRead(id, pBuf, amt, offset);

View File

@ -18,7 +18,7 @@
** file simultaneously, or one process from reading the database while
** another is writing.
**
** @(#) $Id: pager.c,v 1.365 2007/08/22 00:39:20 drh Exp $
** @(#) $Id: pager.c,v 1.366 2007/08/22 11:22:04 danielk1977 Exp $
*/
#ifndef SQLITE_OMIT_DISKIO
#include "sqliteInt.h"
@ -603,6 +603,42 @@ static int osUnlock(sqlite3_file *pFd, int eLock){
return sqlite3OsUnlock(pFd, eLock);
}
/*
** This function determines whether or not the atomic-write optimization
** can be used with this pager. The optimization can be used if:
**
** (a) the value returned by OsDeviceCharacteristics() indicates that
** a database page may be written atomically, and
** (b) the value returned by OsSectorSize() is less than or equal
** to the page size.
**
** If the optimization cannot be used, 0 is returned. If it can be used,
** then the value returned is the size of the journal file when it
** contains rollback data for exactly one page.
*/
#ifdef SQLITE_ENABLE_ATOMIC_WRITE
static int jrnlBufferSize(Pager *pPager){
int dc; /* Device characteristics */
int nSector; /* Sector size */
int nPage; /* Page size */
sqlite3_file *fd = pPager->fd;
if( fd->pMethods ){
dc = sqlite3OsDeviceCharacteristics(fd);
nSector = sqlite3OsSectorSize(fd);
nPage = pPager->pageSize;
}
assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
if( !fd->pMethods || (dc&(SQLITE_IOCAP_ATOMIC|(nPage<<8))&&nSector<=nPage) ){
return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
}
return 0;
}
#endif
/*
** This function should be called when an error occurs within the pager
** code. The first argument is a pointer to the pager structure, the
@ -1820,6 +1856,7 @@ int sqlite3PagerOpen(
int readOnly = 0;
int useJournal = (flags & PAGER_OMIT_JOURNAL)==0;
int noReadlock = (flags & PAGER_NO_READLOCK)!=0;
int journalFileSize = sqlite3JournalSize(pVfs);
/* The default return is a NULL pointer */
*ppPager = 0;
@ -1827,7 +1864,8 @@ int sqlite3PagerOpen(
/* Allocate memory for the pager structure */
pPager = sqlite3MallocZero(
sizeof(*pPager) + /* Pager structure */
pVfs->szOsFile * 3 + /* The db, journal and stmt journal files */
journalFileSize + /* The journal file structure */
pVfs->szOsFile * 2 + /* The db and stmt journal files */
pVfs->mxPathname * 3 + 30 /* zFilename, zDirectory, zJournal */
);
if( !pPager ){
@ -1835,9 +1873,9 @@ int sqlite3PagerOpen(
}
pPtr = (u8 *)&pPager[1];
pPager->fd = (sqlite3_file*)&pPtr[pVfs->szOsFile*0];
pPager->jfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*1];
pPager->stfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*2];
pPager->zFilename = (char*)&pPtr[pVfs->szOsFile*3];
pPager->stfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*1];
pPager->jfd = (sqlite3_file*)&pPtr[pVfs->szOsFile*2];
pPager->zFilename = (char*)&pPtr[pVfs->szOsFile*2+journalFileSize];
pPager->zDirectory = &pPager->zFilename[pVfs->mxPathname];
pPager->zJournal = &pPager->zDirectory[pVfs->mxPathname];
pPager->pVfs = pVfs;
@ -1871,7 +1909,6 @@ int sqlite3PagerOpen(
** In this case we accept the default page size and delay actually
** opening the file until the first call to OsWrite().
*/
/* rc = sqlite3PagerOpentemp(pVfs, pPager->fd, pPager->zFilename); */
tempFile = 1;
pPager->state = PAGER_EXCLUSIVE;
}
@ -2998,7 +3035,7 @@ static int pagerSharedLock(Pager *pPager){
int fout = 0;
int flags = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
assert( !pPager->tempFile );
rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, &fout);
rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags,&fout);
assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
if( fout&SQLITE_OPEN_READONLY ){
rc = SQLITE_BUSY;
@ -3460,7 +3497,13 @@ static int pager_open_journal(Pager *pPager){
}else{
flags |= (SQLITE_OPEN_MAIN_JOURNAL);
}
#ifdef SQLITE_ENABLE_ATOMIC_WRITE
rc = sqlite3JournalOpen(
pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
);
#else
rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
#endif
assert( rc!=SQLITE_OK || pPager->jfd->pMethods );
pPager->journalOff = 0;
pPager->setMaster = 0;
@ -4010,27 +4053,36 @@ void sqlite3PagerDontRollback(DbPage *pPg){
** This routine is called to increment the database file change-counter,
** stored at byte 24 of the pager file.
*/
static int pager_incr_changecounter(Pager *pPager){
static int pager_incr_changecounter(Pager *pPager, int isDirect){
PgHdr *pPgHdr;
u32 change_counter;
int rc;
int rc = SQLITE_OK;
if( !pPager->changeCountDone ){
/* Open page 1 of the file for writing. */
rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
if( rc!=SQLITE_OK ) return rc;
rc = sqlite3PagerWrite(pPgHdr);
if( rc!=SQLITE_OK ) return rc;
if( !isDirect ){
rc = sqlite3PagerWrite(pPgHdr);
if( rc!=SQLITE_OK ) return rc;
}
/* Increment the value just read and write it back to byte 24. */
change_counter = sqlite3Get4byte((u8*)pPager->dbFileVers);
change_counter++;
put32bits(((char*)PGHDR_TO_DATA(pPgHdr))+24, change_counter);
if( isDirect && pPager->fd->pMethods ){
const void *zBuf = PGHDR_TO_DATA(pPgHdr);
rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
}
/* Release the page reference. */
sqlite3PagerUnref(pPgHdr);
pPager->changeCountDone = 1;
}
return SQLITE_OK;
return rc;
}
/*
@ -4064,6 +4116,36 @@ int sqlite3PagerCommitPhaseOne(Pager *pPager, const char *zMaster, Pgno nTrunc){
PgHdr *pPg;
assert( pPager->journalOpen );
#ifdef SQLITE_ENABLE_ATOMIC_WRITE
/* The atomic-write optimization can be used if all of the
** following are true:
**
** + The file-system supports the atomic-write property for
** blocks of size page-size, and
** + This commit is not part of a multi-file transaction, and
** + Exactly one page has been modified and store in the journal file.
**
** If the optimization can be used, then the journal file will never
** be created for this transaction.
*/
if( !zMaster && pPager->journalOff==jrnlBufferSize(pPager) && nTrunc==0
&& (0==pPager->pDirty || 0==pPager->pDirty->pDirty)
){
/* Update the nRec field in the journal file. */
int offset = pPager->journalHdr + sizeof(aJournalMagic);
assert(pPager->nRec==1);
rc = write32bits(pPager->jfd, offset, pPager->nRec);
/* Update the db file change counter. The following call will modify
** the in-memory representation of page 1 to include the updated
** change counter and then write page 1 directly to the database
** file. Because of the atomic-write property of the host file-system,
** this is safe.
*/
rc = pager_incr_changecounter(pPager, 1);
}else
#endif
/* If a master journal file name has already been written to the
** journal file, then no sync is required. This happens when it is
** written, then the process fails to upgrade from a RESERVED to an
@ -4071,7 +4153,7 @@ int sqlite3PagerCommitPhaseOne(Pager *pPager, const char *zMaster, Pgno nTrunc){
** transaction the m-j name will have already been written.
*/
if( !pPager->setMaster ){
rc = pager_incr_changecounter(pPager);
rc = pager_incr_changecounter(pPager, 0);
if( rc!=SQLITE_OK ) goto sync_exit;
#ifndef SQLITE_OMIT_AUTOVACUUM
if( nTrunc!=0 ){
@ -4095,8 +4177,8 @@ int sqlite3PagerCommitPhaseOne(Pager *pPager, const char *zMaster, Pgno nTrunc){
rc = writeMasterJournal(pPager, zMaster);
if( rc!=SQLITE_OK ) goto sync_exit;
rc = syncJournal(pPager);
if( rc!=SQLITE_OK ) goto sync_exit;
}
if( rc!=SQLITE_OK ) goto sync_exit;
#ifndef SQLITE_OMIT_AUTOVACUUM
if( nTrunc!=0 ){

View File

@ -11,7 +11,7 @@
*************************************************************************
** Internal interface definitions for SQLite.
**
** @(#) $Id: sqliteInt.h,v 1.595 2007/08/21 19:33:56 drh Exp $
** @(#) $Id: sqliteInt.h,v 1.596 2007/08/22 11:22:04 danielk1977 Exp $
*/
#ifndef _SQLITEINT_H_
#define _SQLITEINT_H_
@ -1862,6 +1862,13 @@ int sqlite3Reprepare(Vdbe*);
void sqlite3ExprListCheckLength(Parse*, ExprList*, int, const char*);
CollSeq *sqlite3BinaryCompareCollSeq(Parse *, Expr *, Expr *);
#ifdef SQLITE_ENABLE_ATOMIC_WRITE
int sqlite3JournalOpen(sqlite3_vfs *, const char *, sqlite3_file *, int, int);
int sqlite3JournalSize(sqlite3_vfs *);
#else
#define sqlite3JournalSize(pVfs) ((pVfs)->szOsFile)
#endif
#if SQLITE_MAX_EXPR_DEPTH>0
void sqlite3ExprSetHeight(Expr *);
int sqlite3SelectExprHeight(Select *);