From d2f5ee20f051e768a4883ce9c01b7725da1fb6a7 Mon Sep 17 00:00:00 2001 From: dan Date: Mon, 20 Oct 2014 16:24:23 +0000 Subject: [PATCH] Have the ota extension perform an incremental checkpoint after generating the wal file. FossilOrigin-Name: 0bf1301aacb3b717b4cc020fbda9fab0bae331c3 --- ext/ota/ota1.test | 6 +- ext/ota/sqlite3ota.c | 535 ++++++++++++++++++++++++++++++------------- manifest | 30 +-- manifest.uuid | 2 +- src/main.c | 18 ++ src/pager.c | 20 +- src/pager.h | 1 + src/pragma.c | 2 +- src/sqlite.h.in | 34 +++ src/wal.c | 416 +++++++++++++++++++++++---------- src/wal.h | 9 + test/wal.test | 1 + 12 files changed, 770 insertions(+), 304 deletions(-) diff --git a/ext/ota/ota1.test b/ext/ota/ota1.test index c55ec6ee3b..4ac469573f 100644 --- a/ext/ota/ota1.test +++ b/ext/ota/ota1.test @@ -124,12 +124,12 @@ foreach {tn2 cmd} {1 run_ota 2 step_ota} { } { reset_db execsql $schema - + do_test 1.$tn2.$tn.1 { create_ota1 ota.db $cmd test.db ota.db } {SQLITE_DONE} - + do_execsql_test 1.$tn2.$tn.2 { SELECT * FROM t1 ORDER BY a ASC; } { @@ -137,7 +137,7 @@ foreach {tn2 cmd} {1 run_ota 2 step_ota} { 2 two three 3 {} 8.2 } - + do_execsql_test 1.$tn2.$tn.3 { PRAGMA integrity_check } ok } } diff --git a/ext/ota/sqlite3ota.c b/ext/ota/sqlite3ota.c index 18a71910e5..1d4ea1f859 100644 --- a/ext/ota/sqlite3ota.c +++ b/ext/ota/sqlite3ota.c @@ -22,21 +22,54 @@ /* ** The ota_state table is used to save the state of a partially applied -** update so that it can be resumed later. The table contains at most a -** single row: +** update so that it can be resumed later. The table consists of integer +** keys mapped to values as follows: ** -** "tbl" -> Table currently being written (target database names). +** OTA_STATE_STAGE: +** May be set to integer values 1, 2 or 3. As follows: +** 0: Nothing at all has been done. +** 1: the *-ota file is currently under construction. +** 2: the *-ota file has been constructed, but not yet moved +** to the *-wal path. +** 3: the checkpoint is underway. ** -** "idx" -> Index currently being written (target database names). -** Or, if the main table is being written, a NULL value. +** OTA_STATE_TBL: +** Only valid if STAGE==1. The target database name of the table +** currently being written. ** -** "row" -> Number of rows for this object already processed +** OTA_STATE_IDX: +** Only valid if STAGE==1. The target database name of the index +** currently being written, or NULL if the main table is currently being +** updated. +** +** OTA_STATE_ROW: +** Only valid if STAGE==1. Number of rows already processed for the current +** table/index. +** +** OTA_STATE_PROGRESS: +** Total number of sqlite3ota_step() calls made so far as part of this +** ota update. +** +** OTA_STATE_CKPT: +** Valid if STAGE==3. The blob to pass to sqlite3ckpt_start() to resume +** the incremental checkpoint. ** -** "progress" -> total number of key/value b-tree operations performed -** so far as part of this ota update. */ +#define OTA_STATE_STAGE 1 +#define OTA_STATE_TBL 2 +#define OTA_STATE_IDX 3 +#define OTA_STATE_ROW 4 +#define OTA_STATE_PROGRESS 5 +#define OTA_STATE_CKPT 6 + +#define OTA_STAGE_OAL 1 +#define OTA_STAGE_COPY 2 +#define OTA_STAGE_CKPT 3 +#define OTA_STAGE_DONE 4 + + #define OTA_CREATE_STATE "CREATE TABLE IF NOT EXISTS ota.ota_state" \ - "(tbl, idx, row, progress)" + "(k INTEGER PRIMARY KEY, v)" typedef struct OtaState OtaState; typedef struct OtaObjIter OtaObjIter; @@ -45,8 +78,11 @@ typedef struct OtaObjIter OtaObjIter; ** A structure to store values read from the ota_state table in memory. */ struct OtaState { + int eStage; char *zTbl; char *zIdx; + unsigned char *pCkptState; + int nCkptState; int nRow; sqlite3_int64 nProgress; }; @@ -88,13 +124,16 @@ struct OtaObjIter { ** OTA handle. */ struct sqlite3ota { + int eStage; /* Value of OTA_STATE_STAGE field */ sqlite3 *db; /* "main" -> target db, "ota" -> ota db */ char *zTarget; /* Path to target db */ + char *zOta; /* Path to ota db */ int rc; /* Value returned by last ota_step() call */ char *zErrmsg; /* Error message if rc!=SQLITE_OK */ int nStep; /* Rows processed for current object */ int nProgress; /* Rows processed for all objects */ - OtaObjIter objiter; + OtaObjIter objiter; /* Iterator for skipping through tbl/idx */ + sqlite3_ckpt *pCkpt; /* Incr-checkpoint handle */ }; /* @@ -742,6 +781,84 @@ static int otaGetUpdateStmt( return p->rc; } +static void otaOpenDatabase(sqlite3ota *p){ + assert( p->rc==SQLITE_OK ); + sqlite3_close(p->db); + p->db = 0; + + p->rc = sqlite3_open(p->zTarget, &p->db); + if( p->rc ){ + p->zErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(p->db)); + } + otaMPrintfExec(p, "ATTACH %Q AS ota", p->zOta); +} + +/* +** This routine is a copy of the sqlite3FileSuffix3() routine from the core. +** It is a no-op unless SQLITE_ENABLE_8_3_NAMES is defined. +** +** If SQLITE_ENABLE_8_3_NAMES is set at compile-time and if the database +** filename in zBaseFilename is a URI with the "8_3_names=1" parameter and +** if filename in z[] has a suffix (a.k.a. "extension") that is longer than +** three characters, then shorten the suffix on z[] to be the last three +** characters of the original suffix. +** +** If SQLITE_ENABLE_8_3_NAMES is set to 2 at compile-time, then always +** do the suffix shortening regardless of URI parameter. +** +** Examples: +** +** test.db-journal => test.nal +** test.db-wal => test.wal +** test.db-shm => test.shm +** test.db-mj7f3319fa => test.9fa +*/ +static void otaFileSuffix3(const char *zBase, char *z){ +#ifdef SQLITE_ENABLE_8_3_NAMES +#if SQLITE_ENABLE_8_3_NAMES<2 + if( sqlite3_uri_boolean(zBase, "8_3_names", 0) ) +#endif + { + int i, sz; + sz = sqlite3Strlen30(z); + for(i=sz-1; i>0 && z[i]!='/' && z[i]!='.'; i--){} + if( z[i]=='.' && ALWAYS(sz>i+4) ) memmove(&z[i+1], &z[sz-3], 4); + } +#endif +} + +/* +** Move the "*-oal" file corresponding to the target database to the +** "*-wal" location. If an error occurs, leave an error code and error +** message in the ota handle. +*/ +static void otaMoveOalFile(sqlite3ota *p){ + const char *zBase = sqlite3_db_filename(p->db, "main"); + + char *zWal = sqlite3_mprintf("%s-wal", zBase); + char *zOal = sqlite3_mprintf("%s-oal", zBase); + + assert( p->rc==SQLITE_OK && p->zErrmsg==0 ); + if( zWal==0 || zOal==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + /* Move the *-oal file to *-wal. At this point connection p->db is + ** holding a SHARED lock on the target database file (because it is + ** in WAL mode). So no other connection may be writing the db. */ + otaFileSuffix3(zBase, zWal); + otaFileSuffix3(zBase, zOal); + rename(zOal, zWal); + + /* Re-open the databases. */ + otaObjIterFinalize(&p->objiter); + otaOpenDatabase(p); + p->eStage = OTA_STAGE_CKPT; + } + + sqlite3_free(zWal); + sqlite3_free(zOal); +} + /* ** The SELECT statement iterating through the keys for the current object ** (p->objiter.pSelect) currently points to a valid row. This function @@ -863,51 +980,170 @@ static int otaStep(sqlite3ota *p){ return p->rc; } +/* +** Increment the schema cookie of the main database opened by p->db. +*/ +static void otaIncrSchemaCookie(sqlite3ota *p){ + int iCookie = 1000000; + sqlite3_stmt *pStmt; + + assert( p->rc==SQLITE_OK && p->zErrmsg==0 ); + p->rc = prepareAndCollectError(p->db, &pStmt, &p->zErrmsg, + "PRAGMA schema_version" + ); + if( p->rc==SQLITE_OK ){ + if( SQLITE_ROW==sqlite3_step(pStmt) ){ + iCookie = sqlite3_column_int(pStmt, 0); + } + p->rc = sqlite3_finalize(pStmt); + } + if( p->rc==SQLITE_OK ){ + otaMPrintfExec(p, "PRAGMA schema_version = %d", iCookie+1); + } +} + /* ** Step the OTA object. */ int sqlite3ota_step(sqlite3ota *p){ if( p ){ - OtaObjIter *pIter = &p->objiter; - while( p && p->rc==SQLITE_OK && pIter->zTbl ){ + switch( p->eStage ){ + case OTA_STAGE_OAL: { + OtaObjIter *pIter = &p->objiter; + while( p && p->rc==SQLITE_OK && pIter->zTbl ){ - if( pIter->bCleanup ){ - /* Clean up the ota_tmp_xxx table for the previous table. It - ** cannot be dropped as there are currently active SQL statements. - ** But the contents can be deleted. */ - otaMPrintfExec(p, "DELETE FROM ota.'ota_tmp_%q'", pIter->zTbl); - }else{ - otaObjIterPrepareAll(p, pIter, 0); - - /* Advance to the next row to process. */ - if( p->rc==SQLITE_OK ){ - int rc = sqlite3_step(pIter->pSelect); - if( rc==SQLITE_ROW ){ - p->nStep++; - p->nProgress++; - return otaStep(p); + if( pIter->bCleanup ){ + /* Clean up the ota_tmp_xxx table for the previous table. It + ** cannot be dropped as there are currently active SQL statements. + ** But the contents can be deleted. */ + otaMPrintfExec(p, "DELETE FROM ota.'ota_tmp_%q'", pIter->zTbl); + }else{ + otaObjIterPrepareAll(p, pIter, 0); + + /* Advance to the next row to process. */ + if( p->rc==SQLITE_OK ){ + int rc = sqlite3_step(pIter->pSelect); + if( rc==SQLITE_ROW ){ + p->nProgress++; + p->nStep++; + return otaStep(p); + } + p->rc = sqlite3_reset(pIter->pSelect); + p->nStep = 0; + } } - p->rc = sqlite3_reset(pIter->pSelect); - p->nStep = 0; + + otaObjIterNext(p, pIter); } + + if( p->rc==SQLITE_OK && pIter->zTbl==0 ){ + p->nProgress++; + otaIncrSchemaCookie(p); + if( p->rc==SQLITE_OK ){ + p->rc = sqlite3_exec(p->db, "COMMIT", 0, 0, &p->zErrmsg); + } + if( p->rc==SQLITE_OK ){ + otaMoveOalFile(p); + } + } + break; } - otaObjIterNext(p, pIter); - } + case OTA_STAGE_CKPT: { - if( p->rc==SQLITE_OK && pIter->zTbl==0 ){ - p->rc = SQLITE_DONE; + if( p->rc==SQLITE_OK && p->pCkpt==0 ){ + p->rc = sqlite3_ckpt_open(p->db, 0, 0, &p->pCkpt); + } + if( p->rc==SQLITE_OK ){ + if( SQLITE_OK!=sqlite3_ckpt_step(p->pCkpt) ){ + p->rc = sqlite3_ckpt_close(p->pCkpt, 0, 0); + p->pCkpt = 0; + if( p->rc==SQLITE_OK ){ + p->eStage = OTA_STAGE_DONE; + p->rc = SQLITE_DONE; + } + } + p->nProgress++; + } + + break; + } + + default: + break; } } return p->rc; } static void otaSaveTransactionState(sqlite3ota *p){ - otaMPrintfExec(p, - "INSERT OR REPLACE INTO ota.ota_state(rowid, tbl, idx, row, progress)" - "VALUES(1, %Q, %Q, %d, %lld)", - p->objiter.zTbl, p->objiter.zIdx, p->nStep, p->nProgress + sqlite3_stmt *pInsert; + int rc; + + assert( (p->rc==SQLITE_OK || p->rc==SQLITE_DONE) && p->zErrmsg==0 ); + rc = prepareFreeAndCollectError(p->db, &pInsert, &p->zErrmsg, + sqlite3_mprintf( + "INSERT OR REPLACE INTO ota.ota_state(k, v) VALUES " + "(%d, %d), " + "(%d, %Q), " + "(%d, %Q), " + "(%d, %d), " + "(%d, %lld), " + "(%d, ?) ", + OTA_STATE_STAGE, p->eStage, + OTA_STATE_TBL, p->objiter.zTbl, + OTA_STATE_IDX, p->objiter.zIdx, + OTA_STATE_ROW, p->nStep, + OTA_STATE_PROGRESS, p->nProgress, + OTA_STATE_CKPT + ) ); + assert( pInsert==0 || rc==SQLITE_OK ); + if( rc==SQLITE_OK ){ + if( p->pCkpt ){ + unsigned char *pCkptState = 0; + int nCkptState = 0; + rc = sqlite3_ckpt_close(p->pCkpt, &pCkptState, &nCkptState); + p->pCkpt = 0; + sqlite3_bind_blob(pInsert, 1, pCkptState, nCkptState, SQLITE_TRANSIENT); + sqlite3_free(pCkptState); + } + } + if( rc==SQLITE_OK ){ + sqlite3_step(pInsert); + rc = sqlite3_finalize(pInsert); + }else{ + sqlite3_finalize(pInsert); + } + + if( rc!=SQLITE_OK ){ + p->rc = rc; + } +} + +static char *otaStrndup(char *zStr, int nStr, int *pRc){ + char *zRet = 0; + assert( *pRc==SQLITE_OK ); + + if( zStr ){ + int nCopy = nStr; + if( nCopy<0 ) nCopy = strlen(zStr) + 1; + zRet = (char*)sqlite3_malloc(nCopy); + if( zRet ){ + memcpy(zRet, zStr, nCopy); + }else{ + *pRc = SQLITE_NOMEM; + } + } + + return zRet; +} + +static void otaFreeState(OtaState *p){ + sqlite3_free(p->zTbl); + sqlite3_free(p->zIdx); + sqlite3_free(p->pCkptState); + sqlite3_free(p); } /* @@ -920,47 +1156,63 @@ static void otaSaveTransactionState(sqlite3ota *p){ ** and return NULL. */ static OtaState *otaLoadState(sqlite3ota *p){ - const char *zSelect = "SELECT tbl, idx, row, progress FROM ota.ota_state"; + const char *zSelect = "SELECT k, v FROM ota.ota_state"; OtaState *pRet = 0; sqlite3_stmt *pStmt; int rc; + int rc2; assert( p->rc==SQLITE_OK ); - rc = prepareAndCollectError(p->db, &pStmt, &p->zErrmsg, zSelect); - if( rc==SQLITE_OK ){ - if( sqlite3_step(pStmt)==SQLITE_ROW ){ - const char *zIdx = (const char*)sqlite3_column_text(pStmt, 1); - const char *zTbl = (const char*)sqlite3_column_text(pStmt, 0); - int nIdx = zIdx ? (strlen(zIdx) + 1) : 0; - int nTbl = strlen(zTbl) + 1; - int nByte = sizeof(OtaState) + nTbl + nIdx; + pRet = (OtaState*)sqlite3_malloc(sizeof(OtaState)); + if( pRet==0 ){ + rc = SQLITE_NOMEM; + }else{ + memset(pRet, 0, sizeof(OtaState)); + rc = prepareAndCollectError(p->db, &pStmt, &p->zErrmsg, zSelect); + } - pRet = (OtaState*)sqlite3_malloc(nByte); - if( pRet ){ - pRet->zTbl = (char*)&pRet[1]; - memcpy(pRet->zTbl, sqlite3_column_text(pStmt, 0), nTbl); - if( zIdx ){ - pRet->zIdx = &pRet->zTbl[nTbl]; - memcpy(pRet->zIdx, zIdx, nIdx); - }else{ - pRet->zIdx = 0; + while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ + switch( sqlite3_column_int(pStmt, 0) ){ + case OTA_STATE_STAGE: + pRet->eStage = sqlite3_column_int(pStmt, 1); + if( pRet->eStage!=OTA_STAGE_OAL + && pRet->eStage!=OTA_STAGE_COPY + && pRet->eStage!=OTA_STAGE_CKPT + ){ + p->rc = SQLITE_CORRUPT; } - pRet->nRow = sqlite3_column_int(pStmt, 2); - pRet->nProgress = sqlite3_column_int64(pStmt, 3); - } - }else{ - pRet = (OtaState*)sqlite3_malloc(sizeof(OtaState)); - if( pRet ){ - memset(pRet, 0, sizeof(*pRet)); - } - } - rc = sqlite3_finalize(pStmt); - if( rc==SQLITE_OK && pRet==0 ) rc = SQLITE_NOMEM; - if( rc!=SQLITE_OK ){ - sqlite3_free(pRet); - pRet = 0; + break; + + case OTA_STATE_TBL: + pRet->zTbl = otaStrndup((char*)sqlite3_column_text(pStmt, 1), -1, &rc); + break; + + case OTA_STATE_IDX: + pRet->zIdx = otaStrndup((char*)sqlite3_column_text(pStmt, 1), -1, &rc); + break; + + case OTA_STATE_ROW: + pRet->nRow = sqlite3_column_int(pStmt, 1); + break; + + case OTA_STATE_PROGRESS: + pRet->nProgress = sqlite3_column_int64(pStmt, 1); + break; + + case OTA_STATE_CKPT: + pRet->nCkptState = sqlite3_column_bytes(pStmt, 1); + pRet->pCkptState = (unsigned char*)otaStrndup( + (char*)sqlite3_column_blob(pStmt, 1), pRet->nCkptState, &rc + ); + break; + + default: + rc = SQLITE_CORRUPT; + break; } } + rc2 = sqlite3_finalize(pStmt); + if( rc==SQLITE_OK ) rc = rc2; p->rc = rc; return pRet; @@ -999,62 +1251,6 @@ static void otaLoadTransactionState(sqlite3ota *p, OtaState *pState){ } } -/* -** This routine is a copy of the sqlite3FileSuffix3() routine from the core. -** It is a no-op unless SQLITE_ENABLE_8_3_NAMES is defined. -** -** If SQLITE_ENABLE_8_3_NAMES is set at compile-time and if the database -** filename in zBaseFilename is a URI with the "8_3_names=1" parameter and -** if filename in z[] has a suffix (a.k.a. "extension") that is longer than -** three characters, then shorten the suffix on z[] to be the last three -** characters of the original suffix. -** -** If SQLITE_ENABLE_8_3_NAMES is set to 2 at compile-time, then always -** do the suffix shortening regardless of URI parameter. -** -** Examples: -** -** test.db-journal => test.nal -** test.db-wal => test.wal -** test.db-shm => test.shm -** test.db-mj7f3319fa => test.9fa -*/ -static void otaFileSuffix3(const char *zBase, char *z){ -#ifdef SQLITE_ENABLE_8_3_NAMES -#if SQLITE_ENABLE_8_3_NAMES<2 - if( sqlite3_uri_boolean(zBase, "8_3_names", 0) ) -#endif - { - int i, sz; - sz = sqlite3Strlen30(z); - for(i=sz-1; i>0 && z[i]!='/' && z[i]!='.'; i--){} - if( z[i]=='.' && ALWAYS(sz>i+4) ) memmove(&z[i+1], &z[sz-3], 4); - } -#endif -} - -/* -** Move the "*-oal" file corresponding to the target database to the -** "*-wal" location. If an error occurs, leave an error code and error -** message in the ota handle. -*/ -static void otaMoveOalFile(const char *zBase, sqlite3ota *p){ - char *zWal = sqlite3_mprintf("%s-wal", p->zTarget); - char *zOal = sqlite3_mprintf("%s-oal", p->zTarget); - - assert( p->rc==SQLITE_DONE && p->zErrmsg==0 ); - if( zWal==0 || zOal==0 ){ - p->rc = SQLITE_NOMEM; - }else{ - otaFileSuffix3(zBase, zWal); - otaFileSuffix3(zBase, zOal); - rename(zOal, zWal); - } - - sqlite3_free(zWal); - sqlite3_free(zOal); -} - /* ** If there is a "*-oal" file in the file-system corresponding to the ** target database in the file-system, delete it. If an error occurs, @@ -1073,8 +1269,9 @@ static void otaDeleteOalFile(sqlite3ota *p){ sqlite3ota *sqlite3ota_open(const char *zTarget, const char *zOta){ sqlite3ota *p; int nTarget = strlen(zTarget); + int nOta = strlen(zOta); - p = (sqlite3ota*)sqlite3_malloc(sizeof(sqlite3ota)+nTarget+1); + p = (sqlite3ota*)sqlite3_malloc(sizeof(sqlite3ota)+nTarget+1+nOta+1); if( p ){ OtaState *pState = 0; @@ -1082,11 +1279,9 @@ sqlite3ota *sqlite3ota_open(const char *zTarget, const char *zOta){ memset(p, 0, sizeof(sqlite3ota)); p->zTarget = (char*)&p[1]; memcpy(p->zTarget, zTarget, nTarget+1); - p->rc = sqlite3_open(zTarget, &p->db); - if( p->rc ){ - p->zErrmsg = sqlite3_mprintf("%s", sqlite3_errmsg(p->db)); - } - otaMPrintfExec(p, "ATTACH %Q AS ota", zOta); + p->zOta = &p->zTarget[nTarget+1]; + memcpy(p->zOta, zOta, nOta+1); + otaOpenDatabase(p); /* If it has not already been created, create the ota_state table */ if( p->rc==SQLITE_OK ){ @@ -1095,32 +1290,45 @@ sqlite3ota *sqlite3ota_open(const char *zTarget, const char *zOta){ if( p->rc==SQLITE_OK ){ pState = otaLoadState(p); - if( pState && pState->zTbl==0 ){ - otaDeleteOalFile(p); + assert( pState || p->rc!=SQLITE_OK ); + if( pState ){ + if( pState->eStage==0 ){ + otaDeleteOalFile(p); + p->eStage = 1; + }else{ + p->eStage = pState->eStage; + } + p->nProgress = pState->nProgress; } } + assert( p->rc!=SQLITE_OK || p->eStage!=0 ); - if( p->rc==SQLITE_OK ){ - const char *zScript = - "PRAGMA journal_mode=off;" - "PRAGMA pager_ota_mode=1;" - "PRAGMA ota_mode=1;" - "BEGIN IMMEDIATE;" - ; - p->rc = sqlite3_exec(p->db, zScript, 0, 0, &p->zErrmsg); + if( p->eStage==OTA_STAGE_OAL ){ + if( p->rc==SQLITE_OK ){ + const char *zScript = + "PRAGMA journal_mode=off;" + "PRAGMA pager_ota_mode=1;" + "PRAGMA ota_mode=1;" + "BEGIN IMMEDIATE;" + ; + p->rc = sqlite3_exec(p->db, zScript, 0, 0, &p->zErrmsg); + } + + /* Point the object iterator at the first object */ + if( p->rc==SQLITE_OK ){ + p->rc = otaObjIterFirst(p, &p->objiter); + } + + if( p->rc==SQLITE_OK ){ + otaLoadTransactionState(p, pState); + } + }else if( p->rc==SQLITE_OK && p->eStage==OTA_STAGE_CKPT ){ + p->rc = sqlite3_ckpt_open( + p->db, pState->pCkptState, pState->nCkptState, &p->pCkpt + ); } - /* Point the object iterator at the first object */ - if( p->rc==SQLITE_OK ){ - p->rc = otaObjIterFirst(p, &p->objiter); - } - - if( p->rc==SQLITE_OK ){ - p->nProgress = pState->nProgress; - otaLoadTransactionState(p, pState); - } - - sqlite3_free(pState); + otaFreeState(pState); } return p; @@ -1132,35 +1340,32 @@ sqlite3ota *sqlite3ota_open(const char *zTarget, const char *zOta){ int sqlite3ota_close(sqlite3ota *p, char **pzErrmsg){ int rc; if( p ){ - const char *zBase = sqlite3_db_filename(p->db, "main"); /* If the update has not been fully applied, save the state in ** the ota db. If successful, this call also commits the open ** transaction on the ota db. */ assert( p->rc!=SQLITE_ROW ); - if( p->rc==SQLITE_OK ){ + if( p->rc==SQLITE_OK || p->rc==SQLITE_DONE ){ assert( p->zErrmsg==0 ); otaSaveTransactionState(p); } - /* Close all open statement handles. */ + /* Close any open statement handles. */ otaObjIterFinalize(&p->objiter); /* Commit the transaction to the *-oal file. */ - if( p->rc==SQLITE_OK || p->rc==SQLITE_DONE ){ - rc = sqlite3_exec(p->db, "COMMIT", 0, 0, &p->zErrmsg); - if( rc!=SQLITE_OK ) p->rc = rc; + if( p->rc==SQLITE_OK && p->eStage==OTA_STAGE_OAL ){ + p->rc = sqlite3_exec(p->db, "COMMIT", 0, 0, &p->zErrmsg); } - /* Close the open database handles */ + if( p->rc==SQLITE_OK && p->eStage==OTA_STAGE_CKPT ){ + p->rc = sqlite3_exec(p->db, "PRAGMA pager_ota_mode=2", 0, 0, &p->zErrmsg); + } + + /* Close the open database handle */ + if( p->pCkpt ) sqlite3_ckpt_close(p->pCkpt, 0, 0); sqlite3_close(p->db); - /* If the OTA has been completely applied and no error occurred, move - ** the *-oal file to *-wal. */ - if( p->rc==SQLITE_DONE ){ - otaMoveOalFile(zBase, p); - } - rc = p->rc; *pzErrmsg = p->zErrmsg; sqlite3_free(p); diff --git a/manifest b/manifest index 7cd0869d20..373d1c020a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sfurther\stests\sto\sota5.test.\sAdd\s"ota.test",\sfor\srunning\sall\sota\stests. -D 2014-09-19T18:08:39.681 +C Have\sthe\sota\sextension\sperform\san\sincremental\scheckpoint\safter\sgenerating\sthe\swal\sfile. +D 2014-10-20T16:24:23.616 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in cf57f673d77606ab0f2d9627ca52a9ba1464146a F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -123,12 +123,12 @@ F ext/misc/vtshim.c babb0dc2bf116029e3e7c9a618b8a1377045303e F ext/misc/wholenumber.c 784b12543d60702ebdd47da936e278aa03076212 F ext/ota/README.txt cb11e39bfeba952ac8896dab860ada9d54731fb8 F ext/ota/ota.c c11a85af71dccc45976622fe7a51169a481caa91 -F ext/ota/ota1.test 86ff92699aad11e3c80a604832244a043c912a94 +F ext/ota/ota1.test 0c8e5ef3d059bebe7872477381424836326a8e0a F ext/ota/ota2.test 4568c2671d19dbde789fb9091d727a2e94880128 F ext/ota/ota3.test 215dd4a8e238567e0f890a5139b6fdf5494ef311 F ext/ota/ota4.test 60f897f329a6782ef2f24862640acf3c52e48077 F ext/ota/ota5.test ad0799daf8923ddebffe75ae8c5504ca90b7fadb -F ext/ota/sqlite3ota.c b22002105b3b7f3baf63bda2b4e6a00c4973418c +F ext/ota/sqlite3ota.c 1b4e4cdb05e67982865466f4b0aaea8d3648269c F ext/ota/sqlite3ota.h 7b20abe9247d292429d00f0a5c237ff6e0dc0196 F ext/rtree/README 6315c0d73ebf0ec40dedb5aa0e942bc8b54e3761 F ext/rtree/rtree.c 57bec53e1a677ab74217fe1f20a58c3a47261d6b @@ -202,7 +202,7 @@ F src/journal.c b4124532212b6952f42eb2c12fa3c25701d8ba8d F src/legacy.c ba1863ea58c4c840335a84ec276fc2b25e22bc4e F src/lempar.c 7274c97d24bb46631e504332ccd3bd1b37841770 F src/loadext.c de741e66e5ddc1598d904d7289239696e40ed994 -F src/main.c d15621461fb0c52675eba2b650492ed1beef69ab +F src/main.c 57cdf37a1bf596829831530fe02f2649d4b721e3 F src/malloc.c 4c1d511157defd7b1d023062cf05a1dc17b8f79b F src/mem0.c 6a55ebe57c46ca1a7d98da93aaa07f99f1059645 F src/mem1.c faf615aafd8be74a71494dfa027c113ea5c6615f @@ -223,13 +223,13 @@ F src/os_setup.h c9d4553b5aaa6f73391448b265b89bed0b890faa F src/os_unix.c addd023b26c623fec4dedc110fc4370a65b4768c F src/os_win.c 0a4042ef35f322e86fa01f6c8884c5e645b911e7 F src/os_win.h 09e751b20bbc107ffbd46e13555dc73576d88e21 -F src/pager.c 348c9da924d2e0064e6a2646bba157e02dcc26cc -F src/pager.h b62e645e8a19e4f0181253d1663a09f2793d8c94 +F src/pager.c 59c1d41bba7dd736f27fdcf74691675aa685a048 +F src/pager.h 997a4aa3bad1638dabe90a0cbb674cc4a7b9c034 F src/parse.y 22d6a074e5f5a7258947a1dc55a9bf946b765dd0 F src/pcache.c 4121a0571c18581ee9f82f086d5e2030051ebd6a F src/pcache.h 9b559127b83f84ff76d735c8262f04853be0c59a F src/pcache1.c dab8ab930d4a73b99768d881185994f34b80ecaa -F src/pragma.c 5b255c09d6e38a37ec07830b92acceec5cab8c85 +F src/pragma.c 310939bc2fb7e6456edfb4735d004253a4b2505e F src/prepare.c 6ef0cf2f9274982988ed6b7cab1be23147e94196 F src/printf.c 19e3e81addf593195369ec8d487ed063ad3170bb F src/random.c d10c1f85b6709ca97278428fd5db5bbb9c74eece @@ -237,7 +237,7 @@ F src/resolve.c a3466128b52a86c466e47ac1a19e2174f7b5cf89 F src/rowset.c eccf6af6d620aaa4579bd3b72c1b6395d9e9fa1e F src/select.c 0cd6706fd52ae5db229e9041094db6ec27195335 F src/shell.c c00220cdd7f2027780bc25b78376c16dc24e4b7d -F src/sqlite.h.in a98b0429855f023074f44a63dba23d11469ebc6d +F src/sqlite.h.in f34298ae5de26aebfba0c5ce91590d62ddebc6cb F src/sqlite3.rc 992c9f5fb8285ae285d6be28240a7e8d3a7f2bad F src/sqlite3ext.h 17d487c3c91b0b8c584a32fbeb393f6f795eea7d F src/sqliteInt.h 3210f8bd040d1c6d8b1616325b15dd3ff749e48f @@ -307,8 +307,8 @@ F src/vdbemem.c 8b5e1083fed2da94e315858a7edf5604a5b91804 F src/vdbesort.c 09efa5e5098d1a159cd21f588eb118e4fe87cfde F src/vdbetrace.c 16d39c1ef7d1f4a3a7464bea3b7b4bdd7849c415 F src/vtab.c 019dbfd0406a7447c990e1f7bd1dfcdb8895697f -F src/wal.c 8bd0ced6cf1d3389fd6a73b4f12a1e2bf926e75a -F src/wal.h e25f9d383ffb07986ba20b78dbde2c1d0cb36ab6 +F src/wal.c a5dbbbd8ceccd5e2187b1e7854f359cb5efb7e3b +F src/wal.h 0d3ba0c3f1b4c25796cb213568a84b9f9063f465 F src/walker.c c253b95b4ee44b21c406e2a1052636c31ea27804 F src/where.c dc276288039fb45ce23c80e4535980f5a152d8ec F src/whereInt.h 124d970450955a6982e174b07c320ae6d62a595c @@ -1095,7 +1095,7 @@ F test/vtabF.test fd5ad376f5a34fe0891df1f3cddb4fe7c3eb077e F test/vtab_alter.test 9e374885248f69e251bdaacf480b04a197f125e5 F test/vtab_err.test 0d4d8eb4def1d053ac7c5050df3024fd47a3fbd8 F test/vtab_shared.test ea8778d5b0df200adef2ca7c00c3c37d4375f772 -F test/wal.test 885f32b2b390b30b4aa3dbb0e568f8f78d40f5cc +F test/wal.test d7bb2feeacb74d26f27ebe519334b95adf22c8ae F test/wal2.test 1f841d2048080d32f552942e333fd99ce541dada F test/wal3.test b22eb662bcbc148c5f6d956eaf94b047f7afe9c0 F test/wal4.test 4744e155cd6299c6bd99d3eab1c82f77db9cdb3c @@ -1208,7 +1208,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 0abfd78ceb09b7f7c27c688c8e3fe93268a13b32 F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 1e468fe1e408e513a1e1bbe72fe2a240f2991b3d -R 44fa742e94ebd7f402187be149604025 +P 95ffdaa542df1c28fac97422e5a4b2c5cb81d50a +R febab191432990759e75444c2ec5fc2f U dan -Z 7bef28ffbee5fdb05a4896f797ef3928 +Z 5341e3998131d826ad56eb5ef99fe7b1 diff --git a/manifest.uuid b/manifest.uuid index 788cd2449b..420e80d994 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -95ffdaa542df1c28fac97422e5a4b2c5cb81d50a \ No newline at end of file +0bf1301aacb3b717b4cc020fbda9fab0bae331c3 \ No newline at end of file diff --git a/src/main.c b/src/main.c index 231890de4b..a8f3ee67b9 100644 --- a/src/main.c +++ b/src/main.c @@ -1768,6 +1768,24 @@ int sqlite3_wal_checkpoint_v2( #endif } +int sqlite3_ckpt_open( + sqlite3 *db, + unsigned char *a, int n, + sqlite3_ckpt **ppCkpt +){ + Pager *pPager; + Btree *pBt; + int rc; + + *ppCkpt = 0; + sqlite3_mutex_enter(db->mutex); + pBt = db->aDb[0].pBt; + pPager = sqlite3BtreePager(pBt); + rc = sqlite3PagerWalCheckpointStart(db, pPager, a, n, ppCkpt); + sqlite3_mutex_leave(db->mutex); + return rc; +} + /* ** Checkpoint database zDb. If zDb is NULL, or if the buffer zDb points diff --git a/src/pager.c b/src/pager.c index f9eba7fd7b..09a132e120 100644 --- a/src/pager.c +++ b/src/pager.c @@ -623,6 +623,8 @@ struct PagerSavepoint { ** database file has not been modified since it was created, this variable ** is set to 2. ** +** noCkptOnClose +** ** */ struct Pager { @@ -7288,12 +7290,24 @@ int sqlite3PagerWalFramesize(Pager *pPager){ /* ** Set or clear the "OTA mode" flag. */ -int sqlite3PagerSetOtaMode(Pager *pPager, int bOta){ - if( pPager->pWal || pPager->eState!=PAGER_OPEN ){ +int sqlite3PagerSetOtaMode(Pager *pPager, int iOta){ + if( iOta==1 && (pPager->pWal || pPager->eState!=PAGER_OPEN) ){ return SQLITE_ERROR; } - pPager->otaMode = 1; + pPager->otaMode = iOta; return SQLITE_OK; } +int sqlite3PagerWalCheckpointStart( + sqlite3 *db, + Pager *pPager, + u8 *a, int n, + sqlite3_ckpt **ppCkpt +){ + return sqlite3WalCheckpointStart(db, pPager->pWal, a, n, + pPager->xBusyHandler, pPager->pBusyHandlerArg, + pPager->ckptSyncFlags, ppCkpt + ); +} + #endif /* SQLITE_OMIT_DISKIO */ diff --git a/src/pager.h b/src/pager.h index 0e928fe64c..282da5765d 100644 --- a/src/pager.h +++ b/src/pager.h @@ -209,5 +209,6 @@ void *sqlite3PagerCodec(DbPage *); int sqlite3PagerSetOtaMode(Pager *pPager, int bOta); void sqlite3PagerWalSalt(Pager *pPager, u32 *aSalt); +int sqlite3PagerWalCheckpointStart(sqlite3*, Pager*, u8*, int, sqlite3_ckpt**); #endif /* _PAGER_H_ */ diff --git a/src/pragma.c b/src/pragma.c index ee99002274..c3566e7fd9 100644 --- a/src/pragma.c +++ b/src/pragma.c @@ -900,7 +900,7 @@ void sqlite3Pragma( Btree *pBt = pDb->pBt; assert( pBt!=0 ); if( zRight ){ - int iArg = !!sqlite3Atoi(zRight); + int iArg = sqlite3Atoi(zRight); if( sqlite3BtreeIsInReadTrans(pBt) ){ sqlite3ErrorMsg(pParse, "cannot set pager_ota_mode with open transaction" diff --git a/src/sqlite.h.in b/src/sqlite.h.in index 2ad571ed0b..0635f1cae2 100644 --- a/src/sqlite.h.in +++ b/src/sqlite.h.in @@ -7464,6 +7464,40 @@ int sqlite3_index_writer( int **paiCol, int *pnCol /* OUT: See above */ ); +/* +** Incremental checkpoint API. +** +** An incremental checkpoint handle is opened using the sqlite3_ckpt_open() +** API. To begin a new checkpoint, the second and third arguments should both +** be passed zero. To resume an earlier checkpoint, the second and third +** arguments should specify a buffer returned by an earlier call to +** sqlite3_ckpt_close(). When resuming a checkpoint, if the database or WAL +** file has been modified since the checkpoint was suspended, the +** sqlite3_ckpt_open() call fails with SQLITE_MISMATCH. +** +** Each time sqlite3_ckpt_step() is called on an open checkpoint handle, a +** single page is copied from the WAL file to the database. If no error +** occurs, but the checkpoint is not finished, SQLITE_OK is returned. If the +** checkpoint has been finished (and so sqlite3_ckpt_step() should not be +** called again), SQLITE_DONE is returned. Otherwise, if an error occurs, +** some other SQLite error code is returned. +** +** Calling sqlite3_ckpt_close() closes an open checkpoint handle. If the +** checkpoint has finished and no error has occurred, SQLITE_OK is returned +** and the two output parameters zeroed. Or, if an error has occurred, an +** error code is returned and the two output parameters are zeroed. Finally, +** if the checkpoint is not finished but no error has occurred, SQLITE_OK is +** returned and the first output variable set to point to a buffer allocated +** using sqlite3_malloc() containing the serialized state of the checkpoint. +** The contents of this buffer may be passed to a later call to +** sqlite3_ckpt_open() to restart the checkpoint. The second output variable +** is set to the size of the buffer in bytes. +*/ +typedef struct sqlite3_ckpt sqlite3_ckpt; +int sqlite3_ckpt_open(sqlite3*, unsigned char *a, int n, sqlite3_ckpt **ppCkpt); +int sqlite3_ckpt_step(sqlite3_ckpt*); +int sqlite3_ckpt_close(sqlite3_ckpt*, unsigned char **pa, int *pn); + /* ** Undo the hack that converts floating point types to integer for ** builds on processors without floating point support. diff --git a/src/wal.c b/src/wal.c index c7ad2bcb2c..dcf88ff7d8 100644 --- a/src/wal.c +++ b/src/wal.c @@ -482,6 +482,24 @@ struct WalIterator { } aSegment[1]; /* One for every 32KB page in the wal-index */ }; +/* +** walCheckpoint +*/ +typedef struct WalCkpt WalCkpt; +struct WalCkpt { + sqlite3 *db; /* Database pointer (incremental only) */ + int szPage; /* Database page-size */ + int sync_flags; /* Flags for OsSync() (or 0) */ + u32 mxSafeFrame; /* Max frame that can be backfilled */ + u32 mxPage; /* Max database page to write */ + volatile WalCkptInfo *pInfo; /* The checkpoint status information */ + WalIterator *pIter; /* Wal iterator context */ + Wal *pWal; /* Pointer to owner object */ + u8 *aBuf; /* Temporary page-sized buffer to use */ + int rc; /* Error code. SQLITE_DONE -> finished */ + int nStep; /* Number of times pIter has been stepped */ +}; + /* ** Define the parameters of the hash tables in the wal-index file. There ** is a hash-table following every HASHTABLE_NPAGE page numbers in the @@ -1623,6 +1641,155 @@ static int walPagesize(Wal *pWal){ return (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16); } +static int walCheckpointStart( + Wal *pWal, + u8 *aBuf, /* Page-sized temporary buffer */ + int nBuf, /* Size of aBuf[] in bytes */ + int (*xBusy)(void*), /* Function to call when busy (or NULL) */ + void *pBusyArg, /* Context argument for xBusyHandler */ + int sync_flags, /* Flags for OsSync() (or 0) */ + WalCkpt *p /* Allocated object to populate */ +){ + int rc; /* Return code */ + int i; /* Iterator variable */ + + memset(p, 0, sizeof(WalCkpt)); + + if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ + return SQLITE_CORRUPT_BKPT; + } + + p->szPage = walPagesize(pWal); + p->pWal = pWal; + p->aBuf = aBuf; + p->sync_flags = sync_flags; + testcase( p->szPage<=32768 ); + testcase( p->szPage>=65536 ); + p->pInfo = walCkptInfo(pWal); + if( p->pInfo->nBackfill>=pWal->hdr.mxFrame ) return SQLITE_OK; + + /* Allocate the iterator */ + rc = walIteratorInit(pWal, &p->pIter); + if( rc!=SQLITE_OK ) return rc; + assert( p->pIter ); + + /* Compute in mxSafeFrame the index of the last frame of the WAL that is + ** safe to write into the database. Frames beyond mxSafeFrame might + ** overwrite database pages that are in use by active readers and thus + ** cannot be backfilled from the WAL. + */ + p->mxSafeFrame = pWal->hdr.mxFrame; + p->mxPage = pWal->hdr.nPage; + for(i=1; ipInfo->aReadMark[i]; + if( p->mxSafeFrame>y ){ + assert( y<=pWal->hdr.mxFrame ); + rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); + if( rc==SQLITE_OK ){ + p->pInfo->aReadMark[i] = (i==1 ? p->mxSafeFrame : READMARK_NOT_USED); + walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); + }else if( rc==SQLITE_BUSY ){ + p->mxSafeFrame = y; + xBusy = 0; + }else{ + walIteratorFree(p->pIter); + p->pIter = 0; + return rc; + } + } + } + + if( p->pInfo->nBackfill>=p->mxSafeFrame + || (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0), 1))!=SQLITE_OK + ){ + walIteratorFree(p->pIter); + p->pIter = 0; + } + if( rc==SQLITE_BUSY ) rc = SQLITE_OK; + + if( rc==SQLITE_OK && p->pIter ){ + /* Sync the WAL to disk */ + if( sync_flags ){ + rc = sqlite3OsSync(pWal->pWalFd, sync_flags); + } + + /* If the database may grow as a result of this checkpoint, hint + ** about the eventual size of the db file to the VFS layer. */ + if( rc==SQLITE_OK ){ + i64 nSize; /* Current size of database file */ + i64 nReq = ((i64)p->mxPage * p->szPage); + rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); + if( rc==SQLITE_OK && nSizepDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); + } + } + } + + return rc; +} + +static int walCheckpointStep(WalCkpt *p){ + u32 iDbpage = 0; /* Next database page to write */ + u32 iFrame = 0; /* Wal frame containing data for iDbpage */ + int rc = SQLITE_DONE; + + assert( p->rc==SQLITE_OK ); + while( p->pIter && 0==walIteratorNext(p->pIter, &iDbpage, &iFrame) ){ + i64 iOffset; + assert( walFramePgno(p->pWal, iFrame)==iDbpage ); + p->nStep++; + if( iFrame<=p->pInfo->nBackfill + || iFrame>p->mxSafeFrame + || iDbpage>p->mxPage + ){ + continue; + } + + iOffset = walFrameOffset(iFrame, p->szPage) + WAL_FRAME_HDRSIZE; + /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ + rc = sqlite3OsRead(p->pWal->pWalFd, p->aBuf, p->szPage, iOffset); + if( rc!=SQLITE_OK ) break; + iOffset = (iDbpage-1)*(i64)p->szPage; + testcase( IS_BIG_INT(iOffset) ); + rc = sqlite3OsWrite(p->pWal->pDbFd, p->aBuf, p->szPage, iOffset); + break; + } + + p->rc = rc; + return rc; +} + +static int walCheckpointFinalize(WalCkpt *p){ + if( p->pIter ){ + int rc = p->rc; + Wal *pWal = p->pWal; + + /* If work was completed */ + if( p->pIter && rc==SQLITE_DONE ){ + rc = SQLITE_OK; + if( p->mxSafeFrame==walIndexHdr(pWal)->mxFrame ){ + i64 szDb = pWal->hdr.nPage*(i64)p->szPage; + testcase( IS_BIG_INT(szDb) ); + rc = sqlite3OsTruncate(pWal->pDbFd, szDb); + if( rc==SQLITE_OK && p->sync_flags ){ + rc = sqlite3OsSync(pWal->pDbFd, p->sync_flags); + } + } + if( rc==SQLITE_OK ){ + p->pInfo->nBackfill = p->mxSafeFrame; + } + p->rc = rc; + } + + /* Release the reader lock held while backfilling */ + walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); + walIteratorFree(p->pIter); + p->pIter = 0; + } + + return p->rc; +} + /* ** Copy as much content as we can from the WAL back into the database file ** in response to an sqlite3_wal_checkpoint() request or the equivalent. @@ -1660,120 +1827,23 @@ static int walCheckpoint( int (*xBusyCall)(void*), /* Function to call when busy */ void *pBusyArg, /* Context argument for xBusyHandler */ int sync_flags, /* Flags for OsSync() (or 0) */ - u8 *zBuf /* Temporary buffer to use */ + u8 *zBuf, /* Temporary buffer to use */ + int nBuf /* Size of zBuf in bytes */ ){ int rc; /* Return code */ - int szPage; /* Database page-size */ - WalIterator *pIter = 0; /* Wal iterator context */ - u32 iDbpage = 0; /* Next database page to write */ - u32 iFrame = 0; /* Wal frame containing data for iDbpage */ - u32 mxSafeFrame; /* Max frame that can be backfilled */ - u32 mxPage; /* Max database page to write */ - int i; /* Loop counter */ - volatile WalCkptInfo *pInfo; /* The checkpoint status information */ int (*xBusy)(void*) = 0; /* Function to call when waiting for locks */ - - szPage = walPagesize(pWal); - testcase( szPage<=32768 ); - testcase( szPage>=65536 ); - pInfo = walCkptInfo(pWal); - if( pInfo->nBackfill>=pWal->hdr.mxFrame ) return SQLITE_OK; - - /* Allocate the iterator */ - rc = walIteratorInit(pWal, &pIter); - if( rc!=SQLITE_OK ){ - return rc; - } - assert( pIter ); + WalCkpt sC; if( eMode!=SQLITE_CHECKPOINT_PASSIVE ) xBusy = xBusyCall; + rc = walCheckpointStart(pWal, zBuf, nBuf, xBusy, pBusyArg, sync_flags, &sC); + if( sC.pIter==0 ) goto walcheckpoint_out; + assert( rc==SQLITE_OK ); - /* Compute in mxSafeFrame the index of the last frame of the WAL that is - ** safe to write into the database. Frames beyond mxSafeFrame might - ** overwrite database pages that are in use by active readers and thus - ** cannot be backfilled from the WAL. - */ - mxSafeFrame = pWal->hdr.mxFrame; - mxPage = pWal->hdr.nPage; - for(i=1; iaReadMark[i]; - if( mxSafeFrame>y ){ - assert( y<=pWal->hdr.mxFrame ); - rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1); - if( rc==SQLITE_OK ){ - pInfo->aReadMark[i] = (i==1 ? mxSafeFrame : READMARK_NOT_USED); - walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1); - }else if( rc==SQLITE_BUSY ){ - mxSafeFrame = y; - xBusy = 0; - }else{ - goto walcheckpoint_out; - } - } - } - - if( pInfo->nBackfillnBackfill; - - /* Sync the WAL to disk */ - if( sync_flags ){ - rc = sqlite3OsSync(pWal->pWalFd, sync_flags); - } - - /* If the database may grow as a result of this checkpoint, hint - ** about the eventual size of the db file to the VFS layer. - */ - if( rc==SQLITE_OK ){ - i64 nReq = ((i64)mxPage * szPage); - rc = sqlite3OsFileSize(pWal->pDbFd, &nSize); - if( rc==SQLITE_OK && nSizepDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq); - } - } - - - /* Iterate through the contents of the WAL, copying data to the db file. */ - while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){ - i64 iOffset; - assert( walFramePgno(pWal, iFrame)==iDbpage ); - if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ) continue; - iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE; - /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */ - rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset); - if( rc!=SQLITE_OK ) break; - iOffset = (iDbpage-1)*(i64)szPage; - testcase( IS_BIG_INT(iOffset) ); - rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset); - if( rc!=SQLITE_OK ) break; - } - - /* If work was actually accomplished... */ - if( rc==SQLITE_OK ){ - if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){ - i64 szDb = pWal->hdr.nPage*(i64)szPage; - testcase( IS_BIG_INT(szDb) ); - rc = sqlite3OsTruncate(pWal->pDbFd, szDb); - if( rc==SQLITE_OK && sync_flags ){ - rc = sqlite3OsSync(pWal->pDbFd, sync_flags); - } - } - if( rc==SQLITE_OK ){ - pInfo->nBackfill = mxSafeFrame; - } - } - - /* Release the reader lock held while backfilling */ - walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1); - } - - if( rc==SQLITE_BUSY ){ - /* Reset the return code so as not to report a checkpoint failure - ** just because there are active readers. */ - rc = SQLITE_OK; - } + /* Step the checkpoint object until it reports something other than + ** SQLITE_OK. */ + while( SQLITE_OK==(rc = walCheckpointStep(&sC)) ); + if( rc==SQLITE_DONE ) rc = SQLITE_OK; + rc = walCheckpointFinalize(&sC); /* If this is an SQLITE_CHECKPOINT_RESTART operation, and the entire wal ** file has been copied into the database file, then block until all @@ -1782,10 +1852,10 @@ static int walCheckpoint( */ if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){ assert( pWal->writeLock ); - if( pInfo->nBackfillhdr.mxFrame ){ + if( sC.pInfo->nBackfillhdr.mxFrame ){ rc = SQLITE_BUSY; }else if( eMode==SQLITE_CHECKPOINT_RESTART ){ - assert( mxSafeFrame==pWal->hdr.mxFrame ); + assert( sC.mxSafeFrame==pWal->hdr.mxFrame ); rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1); if( rc==SQLITE_OK ){ walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1); @@ -1794,7 +1864,7 @@ static int walCheckpoint( } walcheckpoint_out: - walIteratorFree(pIter); + walIteratorFree(sC.pIter); return rc; } @@ -2971,11 +3041,7 @@ int sqlite3WalCheckpoint( /* Copy data from the log to the database file. */ if( rc==SQLITE_OK ){ - if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){ - rc = SQLITE_CORRUPT_BKPT; - }else{ - rc = walCheckpoint(pWal, eMode2, xBusy, pBusyArg, sync_flags, zBuf); - } + rc = walCheckpoint(pWal, eMode2, xBusy, pBusyArg, sync_flags, zBuf, nBuf); /* If no error occurred, set the output variables. */ if( rc==SQLITE_OK || rc==SQLITE_BUSY ){ @@ -3002,6 +3068,124 @@ int sqlite3WalCheckpoint( return (rc==SQLITE_OK && eMode!=eMode2 ? SQLITE_BUSY : rc); } +int sqlite3_ckpt_step(sqlite3_ckpt *pCkpt){ + int rc; + WalCkpt *p = (WalCkpt*)pCkpt; + sqlite3_mutex_enter(p->db->mutex); + rc = walCheckpointStep(p); + sqlite3_mutex_leave(p->db->mutex); + return rc; +} + +int sqlite3_ckpt_close(sqlite3_ckpt *pCkpt, u8 **paState, int *pnState){ + int rc; + WalCkpt *p = (WalCkpt*)pCkpt; + sqlite3 *db = p->db; + Wal *pWal = p->pWal; + sqlite3_mutex_enter(db->mutex); + if( paState ){ + *paState = 0; + *pnState = 0; + if( p->rc==SQLITE_OK ){ + u8 *aState = sqlite3_malloc(sizeof(u32) * 3); + if( aState==0 ){ + p->rc = SQLITE_NOMEM; + }else{ + *pnState = sizeof(u32)*3; + sqlite3Put4byte(&aState[0], p->nStep); + sqlite3Put4byte(&aState[4], p->pWal->hdr.aCksum[0]); + sqlite3Put4byte(&aState[8], p->pWal->hdr.aCksum[1]); + *paState = aState; + } + } + } + rc = walCheckpointFinalize(p); + walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); + pWal->ckptLock = 0; + sqlite3_free(p); + memset(&pWal->hdr, 0, sizeof(WalIndexHdr)); + sqlite3_mutex_leave(db->mutex); + return rc; +} + +int sqlite3WalCheckpointStart( + sqlite3 *db, /* Database connection */ + Wal *pWal, /* Wal connection */ + u8 *aState, int nState, /* Checkpoint state to restore */ + int (*xBusy)(void*), /* Function to call when busy */ + void *pBusyArg, /* Context argument for xBusyHandler */ + int sync_flags, /* Flags to sync db file with (or 0) */ + sqlite3_ckpt **ppCkpt /* OUT: Incremental checkpoint object */ +){ + WalCkpt *p = 0; + int isChanged = 0; + int rc; + int pgsz; + + *ppCkpt = 0; + if( pWal->readOnly ) return SQLITE_READONLY; + WALTRACE(("WAL%p: checkpoint begins\n", pWal)); + rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1); + if( rc ){ + /* Usually this is SQLITE_BUSY meaning that another thread or process + ** is already running a checkpoint, or maybe a recovery. But it might + ** also be SQLITE_IOERR. */ + return rc; + } + pWal->ckptLock = 1; + + /* Read the wal-index header. */ + rc = walIndexReadHdr(pWal, &isChanged); + if( rc!=SQLITE_OK ) goto ckptstart_out; + if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){ + sqlite3OsUnfetch(pWal->pDbFd, 0, 0); + } + + pgsz = walPagesize(pWal); + p = sqlite3_malloc(sizeof(WalCkpt) + pgsz); + if( p==0 ){ + rc = SQLITE_NOMEM; + goto ckptstart_out; + } + + rc = walCheckpointStart( + pWal, (u8*)&p[1], pgsz, xBusy, pBusyArg, sync_flags, p + ); + p->db = db; + + if( rc==SQLITE_OK && aState ){ + if( nState!=sizeof(u32)*3 ){ + rc = SQLITE_CORRUPT_BKPT; + }else{ + int i; + if( pWal->hdr.aCksum[0]!=sqlite3Get4byte(&aState[4]) + || pWal->hdr.aCksum[1]!=sqlite3Get4byte(&aState[8]) + ){ + rc = SQLITE_MISMATCH; + }else{ + p->nStep = (int)sqlite3Get4byte(aState); + sqlite3Put4byte(&aState[4], pWal->hdr.aCksum[0]); + sqlite3Put4byte(&aState[8], pWal->hdr.aCksum[1]); + for(i=0; rc==SQLITE_OK && inStep; i++){ + u32 dummy1, dummy2; + rc = walIteratorNext(p->pIter, &dummy1, &dummy2); + } + } + } + } + + ckptstart_out: + if( rc!=SQLITE_OK ){ + if( p ) walIteratorFree(p->pIter); + walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1); + pWal->ckptLock = 0; + sqlite3_free(p); + p = 0; + } + *ppCkpt = (sqlite3_ckpt*)p; + return rc; +} + /* Return the value to pass to a sqlite3_wal_hook callback, the ** number of frames in the WAL at the point of the last commit since ** sqlite3WalCallback() was called. If no commits have occurred since diff --git a/src/wal.h b/src/wal.h index 1c6f27d8f0..748b6bc277 100644 --- a/src/wal.h +++ b/src/wal.h @@ -128,6 +128,15 @@ int sqlite3WalHeapMemory(Wal *pWal); int sqlite3WalCheckSalt(Wal *pWal, sqlite3_file*); +int sqlite3WalCheckpointStart(sqlite3 *, + Wal *pWal, /* Wal connection */ + u8 *aState, int nState, /* Checkpoint state to restore */ + int (*xBusy)(void*), /* Function to call when busy */ + void *pBusyArg, /* Context argument for xBusyHandler */ + int sync_flags, /* Flags to sync db file with (or 0) */ + sqlite3_ckpt **ppCkpt /* OUT: Incremental checkpoint object */ +); + #ifdef SQLITE_ENABLE_ZIPVFS /* If the WAL file is not empty, return the number of bytes of content ** stored in each frame (i.e. the db page-size when the WAL was created). diff --git a/test/wal.test b/test/wal.test index 675be73791..63126de58f 100644 --- a/test/wal.test +++ b/test/wal.test @@ -375,6 +375,7 @@ do_test wal-7.2 { # truncation. # do_test wal-8.1 { +breakpoint reopen_db catch { db close } forcedelete test.db test.db-wal