diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 3bf9e2376e..7e991fc21d 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -17,7 +17,7 @@ #include "fts5Int.h" -#define FTS5_DEFAULT_PAGE_SIZE 1000 +#define FTS5_DEFAULT_PAGE_SIZE 4050 #define FTS5_DEFAULT_AUTOMERGE 4 #define FTS5_DEFAULT_CRISISMERGE 16 diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index dc664aab71..e1c71cba0a 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -287,9 +287,8 @@ int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; } ** without overreading if the records are corrupt. */ #define FTS5_DATA_ZERO_PADDING 8 +#define FTS5_DATA_PADDING 20 -typedef struct Fts5BtreeIter Fts5BtreeIter; -typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel; typedef struct Fts5Data Fts5Data; typedef struct Fts5DlidxIter Fts5DlidxIter; typedef struct Fts5DlidxLvl Fts5DlidxLvl; @@ -333,6 +332,9 @@ struct Fts5Index { sqlite3_blob *pReader; /* RO incr-blob open on %_data table */ sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */ sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */ + sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */ + sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */ + sqlite3_stmt *pIdxSelect; int nRead; /* Total number of blocks read */ }; @@ -387,8 +389,7 @@ struct Fts5DlidxWriter { }; struct Fts5SegWriter { int iSegid; /* Segid to write to */ - int nWriter; /* Number of entries in aWriter */ - Fts5PageWriter *aWriter; /* Array of PageWriter objects */ + Fts5PageWriter writer; /* PageWriter object */ i64 iPrevRowid; /* Previous docid written to current leaf */ u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ u8 bFirstRowidInPage; /* True if next rowid is first in page */ @@ -398,6 +399,10 @@ struct Fts5SegWriter { int nDlidx; /* Allocated size of aDlidx[] array */ Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ + + /* Values to insert into the %_idx table */ + Fts5Buffer btterm; /* TODO: Docs */ + int iBtPage; /* TODO: This */ }; /* @@ -570,43 +575,6 @@ struct Fts5DlidxIter { -/* -** An Fts5BtreeIter object is used to iterate through all entries in the -** b-tree hierarchy belonging to a single fts5 segment. In this case the -** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the -** b-tree hierarchy consists of the following: -** -** iLeaf: The page number of the leaf page the entry points to. -** -** term: A split-key that all terms on leaf page $iLeaf must be greater -** than or equal to. The "term" associated with the first b-tree -** hierarchy entry (the one that points to leaf page 1) is always -** an empty string. -** -** nEmpty: The number of empty (termless) leaf pages that immediately -** following iLeaf. -** -** The Fts5BtreeIter object is only used as part of the integrity-check code. -*/ -struct Fts5BtreeIterLevel { - Fts5NodeIter s; /* Iterator for the current node */ - Fts5Data *pData; /* Data for the current node */ -}; -struct Fts5BtreeIter { - Fts5Index *p; /* FTS5 backend object */ - Fts5StructureSegment *pSeg; /* Iterate through this segment's b-tree */ - int nLvl; /* Size of aLvl[] array */ - Fts5BtreeIterLevel *aLvl; /* Level for each tier of b-tree */ - - /* Output variables */ - Fts5Buffer term; /* Current term */ - int iLeaf; /* Leaf containing terms >= current term */ - int nEmpty; /* Number of "empty" leaves following iLeaf */ - int bEof; /* Set to true at EOF */ - int bDlidx; /* True if there exists a dlidx */ -}; - - /* ** The first argument passed to this macro is a pointer to an Fts5Buffer ** object. @@ -748,7 +716,7 @@ static Fts5Data *fts5DataReadOrBuffer( rc = SQLITE_NOMEM; } }else{ - int nSpace = nByte + FTS5_DATA_ZERO_PADDING; + int nSpace = nByte + FTS5_DATA_PADDING; pRet = (Fts5Data*)sqlite3_malloc(nSpace+sizeof(Fts5Data)); if( pRet ){ pRet->n = nByte; @@ -805,6 +773,23 @@ static void fts5DataRelease(Fts5Data *pData){ sqlite3_free(pData); } +static int fts5IndexPrepareStmt( + Fts5Index *p, + sqlite3_stmt **ppStmt, + char *zSql +){ + if( p->rc==SQLITE_OK ){ + if( zSql ){ + p->rc = sqlite3_prepare_v2(p->pConfig->db, zSql, -1, ppStmt, 0); + }else{ + p->rc = SQLITE_NOMEM; + } + } + sqlite3_free(zSql); + return p->rc; +} + + /* ** INSERT OR REPLACE a record into the %_data table. */ @@ -814,17 +799,11 @@ static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ if( p->pWriter==0 ){ int rc = SQLITE_OK; Fts5Config *pConfig = p->pConfig; - char *zSql = sqlite3Fts5Mprintf(&rc, - "REPLACE INTO '%q'.%Q(id, block) VALUES(?,?)", pConfig->zDb, p->zDataTbl - ); - if( zSql ){ - rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pWriter, 0); - sqlite3_free(zSql); - } - if( rc!=SQLITE_OK ){ - p->rc = rc; - return; - } + fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf( + "REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)", + pConfig->zDb, pConfig->zName + )); + if( p->rc ) return; } sqlite3_bind_int64(p->pWriter, 1, iRowid); @@ -845,7 +824,8 @@ static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ int rc; Fts5Config *pConfig = p->pConfig; char *zSql = sqlite3_mprintf( - "DELETE FROM '%q'.%Q WHERE id>=? AND id<=?", pConfig->zDb, p->zDataTbl + "DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?", + pConfig->zDb, pConfig->zName ); if( zSql==0 ){ rc = SQLITE_NOMEM; @@ -872,6 +852,18 @@ static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){ i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0, 0); i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0, 0)-1; fts5DataDelete(p, iFirst, iLast); + if( p->pIdxDeleter==0 ){ + Fts5Config *pConfig = p->pConfig; + fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf( + "DELETE FROM '%q'.'%q_idx' WHERE segid=?", + pConfig->zDb, pConfig->zName + )); + } + if( p->rc==SQLITE_OK ){ + sqlite3_bind_int(p->pIdxDeleter, 1, iSegid); + sqlite3_step(p->pIdxDeleter); + p->rc = sqlite3_reset(p->pIdxDeleter); + } } /* @@ -2334,12 +2326,22 @@ static void fts5SegIterSeekInit( /* This block sets stack variable iPg to the leaf page number that may ** contain term (pTerm/nTerm), if it is present in the segment. */ - for(h=pSeg->nHeight-1; h>0; h--){ - i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, h, iPg); - fts5DataBuffer(p, pBuf, iRowid); - if( p->rc ) break; - iPg = fts5NodeSeek(pBuf, pTerm, nTerm, &bDlidx); + if( p->pIdxSelect==0 ){ + Fts5Config *pConfig = p->pConfig; + fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf( + "SELECT pgno, dlidx FROM '%q'.'%q_idx' WHERE " + "segid=? AND term<=? ORDER BY term DESC LIMIT 1", + pConfig->zDb, pConfig->zName + )); } + if( p->rc ) return; + sqlite3_bind_int(p->pIdxSelect, 1, pSeg->iSegid); + sqlite3_bind_blob(p->pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC); + if( SQLITE_ROW==sqlite3_step(p->pIdxSelect) ){ + iPg = sqlite3_column_int(p->pIdxSelect, 0); + bDlidx = sqlite3_column_int(p->pIdxSelect, 1); + } + p->rc = sqlite3_reset(p->pIdxSelect); if( iPgpgnoFirst ){ iPg = pSeg->pgnoFirst; @@ -3170,52 +3172,52 @@ static int fts5WriteDlidxGrow( } /* -** If an "nEmpty" record must be written to the b-tree before the next -** term, write it now. +** If the current doclist-index accumulating in pWriter->aDlidx[] is large +** enough, flush it to disk and return 1. Otherwise discard it and return +** zero. */ -static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ - if( pWriter->nEmpty ){ - int bFlag = 0; - Fts5PageWriter *pPg; - pPg = &pWriter->aWriter[1]; +static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){ + int bFlag = 0; - /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written - ** to the database, also write the doclist-index to disk. */ - if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ - bFlag = 1; - } - fts5WriteDlidxClear(p, pWriter, bFlag); - fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag); - fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty); - pWriter->nEmpty = 0; - }else{ - fts5WriteDlidxClear(p, pWriter, 0); + /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written + ** to the database, also write the doclist-index to disk. */ + if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ + bFlag = 1; } - - assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].buf.n==0 ); - assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].bPrevValid==0 ); + fts5WriteDlidxClear(p, pWriter, bFlag); + pWriter->nEmpty = 0; + return bFlag; } -static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){ +/* +** This function is called whenever processing of the doclist for the +** last term on leaf page (pWriter->iBtPage) is completed. +** +** The doclist-index for that term is currently stored in-memory within the +** Fts5SegWriter.aDlidx[] array. If it is large enough, this function +** writes it out to disk. Or, if it is too small to bother with, discards +** it. +** +** Fts5SegWriter.btterm currently contains the first term on page iBtPage. +*/ +static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){ + int bFlag; + + assert( pWriter->iBtPage || pWriter->nEmpty==0 ); + if( pWriter->iBtPage==0 ) return; + bFlag = fts5WriteFlushDlidx(p, pWriter); + if( p->rc==SQLITE_OK ){ - Fts5PageWriter *aNew; - Fts5PageWriter *pNew; - int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1); - - aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew); - if( aNew==0 ){ - p->rc = SQLITE_NOMEM; - return; - } - - pNew = &aNew[pWriter->nWriter]; - memset(pNew, 0, sizeof(Fts5PageWriter)); - pNew->pgno = 1; - fts5BufferAppendVarint(&p->rc, &pNew->buf, 1); - - pWriter->nWriter++; - pWriter->aWriter = aNew; + const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:""); + /* The following was already done in fts5WriteInit(): */ + /* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */ + sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC); + sqlite3_bind_int(p->pIdxWriter, 3, pWriter->iBtPage); + sqlite3_bind_int(p->pIdxWriter, 4, bFlag); + sqlite3_step(p->pIdxWriter); + p->rc = sqlite3_reset(p->pIdxWriter); } + pWriter->iBtPage = 0; } /* @@ -3232,36 +3234,9 @@ static void fts5WriteBtreeTerm( Fts5SegWriter *pWriter, /* Writer object */ int nTerm, const u8 *pTerm /* First term on new page */ ){ - int iHeight; - for(iHeight=1; 1; iHeight++){ - Fts5PageWriter *pPage; - - if( iHeight>=pWriter->nWriter ){ - fts5WriteBtreeGrow(p, pWriter); - if( p->rc ) return; - } - pPage = &pWriter->aWriter[iHeight]; - - fts5WriteBtreeNEmpty(p, pWriter); - - if( pPage->buf.n>=p->pConfig->pgsz ){ - /* pPage will be written to disk. The term will be written into the - ** parent of pPage. */ - i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, iHeight, pPage->pgno); - fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); - fts5BufferZero(&pPage->buf); - fts5BufferZero(&pPage->term); - fts5BufferAppendVarint(&p->rc, &pPage->buf, pPage[-1].pgno); - pPage->pgno++; - }else{ - int nPre = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); - fts5BufferAppendVarint(&p->rc, &pPage->buf, nPre+2); - fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm-nPre); - fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm-nPre, pTerm+nPre); - fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); - break; - } - } + fts5WriteFlushBtree(p, pWriter); + fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm); + pWriter->iBtPage = pWriter->writer.pgno; } /* @@ -3345,7 +3320,7 @@ static void fts5WriteDlidxAppend( if( pDlidx->bPrevValid ){ iVal = iRowid - pDlidx->iPrev; }else{ - i64 iPgno = (i==0 ? pWriter->aWriter[0].pgno : pDlidx[-1].pgno); + i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno); assert( pDlidx->buf.n==0 ); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); @@ -3360,7 +3335,7 @@ static void fts5WriteDlidxAppend( static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; - Fts5PageWriter *pPage = &pWriter->aWriter[0]; + Fts5PageWriter *pPage = &pWriter->writer; i64 iRowid; if( pWriter->bFirstTermInPage ){ @@ -3399,7 +3374,7 @@ static void fts5WriteAppendTerm( int nTerm, const u8 *pTerm ){ int nPrefix; /* Bytes of prefix compression for term */ - Fts5PageWriter *pPage = &pWriter->aWriter[0]; + Fts5PageWriter *pPage = &pWriter->writer; assert( pPage->buf.n==0 || pPage->buf.n>4 ); if( pPage->buf.n==0 ){ @@ -3434,7 +3409,7 @@ static void fts5WriteAppendTerm( n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); } fts5WriteBtreeTerm(p, pWriter, n, pTerm); - pPage = &pWriter->aWriter[0]; + pPage = &pWriter->writer; } }else{ nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); @@ -3472,7 +3447,7 @@ static void fts5WriteAppendRowid( int nPos ){ if( p->rc==SQLITE_OK ){ - Fts5PageWriter *pPage = &pWriter->aWriter[0]; + Fts5PageWriter *pPage = &pWriter->writer; /* If this is to be the first docid written to the page, set the ** docid-pointer in the page-header. Also append a value to the dlidx @@ -3507,7 +3482,7 @@ static void fts5WriteAppendPoslistData( const u8 *aData, int nData ){ - Fts5PageWriter *pPage = &pWriter->aWriter[0]; + Fts5PageWriter *pPage = &pWriter->writer; const u8 *a = aData; int n = nData; @@ -3530,7 +3505,7 @@ static void fts5WriteAppendPoslistData( } static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ - fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0); + fts5BufferAppendVarint(&p->rc, &pWriter->writer.buf, 0); } /* @@ -3544,8 +3519,8 @@ static void fts5WriteFinish( int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; + Fts5PageWriter *pLeaf = &pWriter->writer; if( p->rc==SQLITE_OK ){ - Fts5PageWriter *pLeaf = &pWriter->aWriter[0]; if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){ *pnLeaf = 0; *pnHeight = 0; @@ -3554,29 +3529,14 @@ static void fts5WriteFinish( fts5WriteFlushLeaf(p, pWriter); } *pnLeaf = pLeaf->pgno-1; - if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ - fts5WriteBtreeGrow(p, pWriter); - } - if( pWriter->nWriter>1 ){ - fts5WriteBtreeNEmpty(p, pWriter); - } - *pnHeight = pWriter->nWriter; - for(i=1; inWriter; i++){ - Fts5PageWriter *pPg = &pWriter->aWriter[i]; - fts5DataWrite(p, - FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pPg->pgno), - pPg->buf.p, pPg->buf.n - ); - } + fts5WriteFlushBtree(p, pWriter); + *pnHeight = 0; } } - for(i=0; inWriter; i++){ - Fts5PageWriter *pPg = &pWriter->aWriter[i]; - fts5BufferFree(&pPg->term); - fts5BufferFree(&pPg->buf); - } - sqlite3_free(pWriter->aWriter); + fts5BufferFree(&pLeaf->term); + fts5BufferFree(&pLeaf->buf); + fts5BufferFree(&pWriter->btterm); for(i=0; inDlidx; i++){ sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); @@ -3592,48 +3552,21 @@ static void fts5WriteInit( memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = iSegid; - pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, sizeof(Fts5PageWriter)); - if( fts5WriteDlidxGrow(p, pWriter, 1) ) return; - pWriter->nWriter = 1; - pWriter->nDlidx = 1; - pWriter->aWriter[0].pgno = 1; + fts5WriteDlidxGrow(p, pWriter, 1); + pWriter->writer.pgno = 1; pWriter->bFirstTermInPage = 1; -} + pWriter->iBtPage = 1; -static void fts5WriteInitForAppend( - Fts5Index *p, /* FTS5 backend object */ - Fts5SegWriter *pWriter, /* Writer to initialize */ - Fts5StructureSegment *pSeg /* Segment object to append to */ -){ - int nByte = pSeg->nHeight * sizeof(Fts5PageWriter); - memset(pWriter, 0, sizeof(Fts5SegWriter)); - pWriter->iSegid = pSeg->iSegid; - pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte); - pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter)); + if( p->pIdxWriter==0 ){ + Fts5Config *pConfig = p->pConfig; + fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf( + "INSERT INTO '%q'.'%q_idx'(segid,term,pgno,dlidx) VALUES(?,?,?,?)", + pConfig->zDb, pConfig->zName + )); + } if( p->rc==SQLITE_OK ){ - int pgno = 1; - int i; - pWriter->nDlidx = 1; - pWriter->nWriter = pSeg->nHeight; - pWriter->aWriter[0].pgno = pSeg->pgnoLast+1; - for(i=pSeg->nHeight-1; i>0; i--){ - i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pgno); - Fts5PageWriter *pPg = &pWriter->aWriter[i]; - pPg->pgno = pgno; - fts5DataBuffer(p, &pPg->buf, iRowid); - if( p->rc==SQLITE_OK ){ - Fts5NodeIter ss; - fts5NodeIterInit(pPg->buf.p, pPg->buf.n, &ss); - while( ss.aData ) fts5NodeIterNext(&p->rc, &ss); - fts5BufferSet(&p->rc, &pPg->term, ss.term.n, ss.term.p); - pgno = ss.iChild; - fts5NodeIterFree(&ss); - } - } - assert( p->rc!=SQLITE_OK || (pgno+pWriter->nEmpty)==pSeg->pgnoLast ); - pWriter->bFirstTermInPage = 1; - assert( pWriter->aWriter[0].term.n==0 ); + sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); } } @@ -3673,7 +3606,7 @@ static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){ fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]); fts5DataRelease(pData); pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; - fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1),iLeafRowid); + fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid); fts5DataWrite(p, iLeafRowid, buf.p, buf.n); } } @@ -3720,8 +3653,11 @@ static void fts5IndexMergeLevel( pLvlOut = &pStruct->aLevel[iLvl+1]; assert( pLvlOut->nSeg>0 ); nInput = pLvl->nMerge; - fts5WriteInitForAppend(p, &writer, &pLvlOut->aSeg[pLvlOut->nSeg-1]); pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; + + fts5WriteInit(p, &writer, pSeg->iSegid); + writer.writer.pgno = pSeg->pgnoLast+1; + writer.iBtPage = 0; }else{ int iSegid = fts5AllocateSegid(p, pStruct); @@ -3812,7 +3748,7 @@ static void fts5IndexMergeLevel( pStruct->nSegment--; } }else{ - assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 ); + assert( pSeg->pgnoLast>0 ); fts5TrimSegments(p, pIter); pLvl->nMerge = nInput; } @@ -3987,7 +3923,7 @@ static void fts5FlushOneHash(Fts5Index *p){ /* Pre-allocate the buffer used to assemble leaf pages to the target ** page size. */ assert( pgsz>0 ); - pBuf = &writer.aWriter[0].buf; + pBuf = &writer.writer.buf; fts5BufferGrow(&p->rc, pBuf, pgsz + 20); /* Begin scanning through hash table entries. This loop runs once for each @@ -4011,7 +3947,7 @@ static void fts5FlushOneHash(Fts5Index *p){ ** flush the leaf to disk here. */ if( (pBuf->n + nTerm + 2) > pgsz ){ fts5WriteFlushLeaf(p, &writer); - pBuf = &writer.aWriter[0].buf; + pBuf = &writer.writer.buf; if( (nTerm + 32) > pBuf->nSpace ){ fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); if( p->rc ) break; @@ -4028,10 +3964,10 @@ static void fts5FlushOneHash(Fts5Index *p){ }else{ fts5PutU16(&pBuf->p[2], pBuf->n); writer.bFirstTermInPage = 0; - if( writer.aWriter[0].pgno!=1 ){ + if( writer.writer.pgno!=1 ){ int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm); - pBuf = &writer.aWriter[0].buf; + pBuf = &writer.writer.buf; assert( nPre0 && writer.aDlidx[0].buf.n==0 ); - writer.aDlidx[0].pgno = writer.aWriter[0].pgno; + writer.aDlidx[0].pgno = writer.writer.pgno; if( pgsz>=(pBuf->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ @@ -4100,7 +4036,7 @@ static void fts5FlushOneHash(Fts5Index *p){ iPos += n; if( pBuf->n>=pgsz ){ fts5WriteFlushLeaf(p, &writer); - pBuf = &writer.aWriter[0].buf; + pBuf = &writer.writer.buf; } if( iPos>=nCopy ) break; } @@ -4134,7 +4070,6 @@ static void fts5FlushOneHash(Fts5Index *p){ fts5StructurePromote(p, 0, pStruct); } - fts5IndexAutomerge(p, &pStruct, pgnoLast); fts5IndexCrisismerge(p, &pStruct); fts5StructureWrite(p, pStruct); @@ -4562,6 +4497,12 @@ int sqlite3Fts5IndexOpen( rc = sqlite3Fts5CreateTable( pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr ); + if( rc==SQLITE_OK ){ + rc = sqlite3Fts5CreateTable(pConfig, "idx", + "segid, term, pgno, dlidx, PRIMARY KEY(segid, term)", + 1, pzErr + ); + } if( rc==SQLITE_OK ){ rc = sqlite3Fts5IndexReinit(p); } @@ -4585,6 +4526,9 @@ int sqlite3Fts5IndexClose(Fts5Index *p){ assert( p->pReader==0 ); sqlite3_finalize(p->pWriter); sqlite3_finalize(p->pDeleter); + sqlite3_finalize(p->pIdxWriter); + sqlite3_finalize(p->pIdxDeleter); + sqlite3_finalize(p->pIdxSelect); sqlite3Fts5HashFree(p->pHash); sqlite3Fts5BufferFree(&p->scratch); sqlite3_free(p->zDataTbl); @@ -4934,92 +4878,6 @@ static u64 fts5IndexEntryCksum( return ret; } -static void fts5BtreeIterInit( - Fts5Index *p, - Fts5StructureSegment *pSeg, - Fts5BtreeIter *pIter -){ - int nByte; - int i; - nByte = sizeof(pIter->aLvl[0]) * (pSeg->nHeight-1); - memset(pIter, 0, sizeof(*pIter)); - if( nByte ){ - pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte); - } - if( p->rc==SQLITE_OK ){ - pIter->nLvl = pSeg->nHeight-1; - pIter->p = p; - pIter->pSeg = pSeg; - } - for(i=0; p->rc==SQLITE_OK && inLvl; i++){ - i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, i+1, 1); - Fts5Data *pData; - pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid); - if( pData ){ - fts5NodeIterInit(pData->p, pData->n, &pIter->aLvl[i].s); - } - } - - if( pIter->nLvl==0 || p->rc ){ - pIter->bEof = 1; - pIter->iLeaf = pSeg->pgnoLast; - }else{ - pIter->nEmpty = pIter->aLvl[0].s.nEmpty; - pIter->iLeaf = pIter->aLvl[0].s.iChild; - pIter->bDlidx = pIter->aLvl[0].s.bDlidx; - } -} - -static void fts5BtreeIterNext(Fts5BtreeIter *pIter){ - Fts5Index *p = pIter->p; - int i; - - assert( pIter->bEof==0 && pIter->aLvl[0].s.aData ); - for(i=0; inLvl && p->rc==SQLITE_OK; i++){ - Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; - fts5NodeIterNext(&p->rc, &pLvl->s); - if( pLvl->s.aData ){ - fts5BufferSet(&p->rc, &pIter->term, pLvl->s.term.n, pLvl->s.term.p); - break; - }else{ - fts5NodeIterFree(&pLvl->s); - fts5DataRelease(pLvl->pData); - pLvl->pData = 0; - } - } - if( i==pIter->nLvl || p->rc ){ - pIter->bEof = 1; - }else{ - int iSegid = pIter->pSeg->iSegid; - for(i--; i>=0; i--){ - Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; - i64 iRowid = FTS5_SEGMENT_ROWID(iSegid, i+1, pLvl[1].s.iChild); - pLvl->pData = fts5DataRead(p, iRowid); - if( pLvl->pData ){ - fts5NodeIterInit(pLvl->pData->p, pLvl->pData->n, &pLvl->s); - } - } - } - - pIter->nEmpty = pIter->aLvl[0].s.nEmpty; - pIter->bDlidx = pIter->aLvl[0].s.bDlidx; - pIter->iLeaf = pIter->aLvl[0].s.iChild; -} - -static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ - int i; - for(i=0; inLvl; i++){ - Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; - fts5NodeIterFree(&pLvl->s); - if( pLvl->pData ){ - fts5DataRelease(pLvl->pData); - pLvl->pData = 0; - } - } - sqlite3_free(pIter->aLvl); - fts5BufferFree(&pIter->term); -} - #ifdef SQLITE_DEBUG /* ** This function is purely an internal test. It does not contribute to @@ -5167,33 +5025,74 @@ static void fts5TestTerm( # define fts5TestTerm(u,v,w,x,y,z) #endif +/* +** Check that: +** +** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and +** contain zero terms. +** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and +** contain zero rowids. +*/ +static void fts5IndexIntegrityCheckEmpty( + Fts5Index *p, + Fts5StructureSegment *pSeg, /* Segment to check internal consistency */ + int iFirst, + int iNoRowid, + int iLast +){ + int i; + + /* Now check that the iter.nEmpty leaves following the current leaf + ** (a) exist and (b) contain no terms. */ + for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){ + Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, i)); + if( pLeaf ){ + if( 0!=fts5GetU16(&pLeaf->p[2]) ) p->rc = FTS5_CORRUPT; + if( i>=iNoRowid && 0!=fts5GetU16(&pLeaf->p[0]) ) p->rc = FTS5_CORRUPT; + } + fts5DataRelease(pLeaf); + if( p->rc ) break; + } +} + static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ Fts5StructureSegment *pSeg /* Segment to check internal consistency */ ){ - Fts5BtreeIter iter; /* Used to iterate through b-tree hierarchy */ + Fts5Config *pConfig = p->pConfig; + sqlite3_stmt *pStmt = 0; + int rc2; + int iIdxPrevLeaf = pSeg->pgnoFirst-1; + int iDlidxPrevLeaf = pSeg->pgnoLast; if( pSeg->pgnoFirst==0 ) return; + fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf( + "SELECT segid, term, pgno, dlidx FROM '%q'.'%q_idx' WHERE segid=%d", + pConfig->zDb, pConfig->zName, pSeg->iSegid + )); + /* Iterate through the b-tree hierarchy. */ - for(fts5BtreeIterInit(p, pSeg, &iter); - p->rc==SQLITE_OK && iter.bEof==0; - fts5BtreeIterNext(&iter) - ){ + while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ i64 iRow; /* Rowid for this leaf */ Fts5Data *pLeaf; /* Data for this leaf */ int iOff; /* Offset of first term on leaf */ int i; /* Used to iterate through empty leaves */ + int nIdxTerm = sqlite3_column_bytes(pStmt, 1); + const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1); + int iIdxLeaf = sqlite3_column_int(pStmt, 2); + int bIdxDlidx = sqlite3_column_int(pStmt, 3); + /* If the leaf in question has already been trimmed from the segment, ** ignore this b-tree entry. Otherwise, load it into memory. */ - if( iter.iLeafpgnoFirst ) continue; - iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iter.iLeaf); + if( iIdxLeafpgnoFirst ) continue; + iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iIdxLeaf); pLeaf = fts5DataRead(p, iRow); if( pLeaf==0 ) break; /* Check that the leaf contains at least one term, and that it is equal - ** to or larger than the split-key in iter.term. Also check that if there + ** to or larger than the split-key in zIdxTerm. Also check that if there ** is also a rowid pointer within the leaf page header, it points to a ** location before the term. */ iOff = fts5GetU16(&pLeaf->p[2]); @@ -5209,8 +5108,8 @@ static void fts5IndexIntegrityCheckSegment( p->rc = FTS5_CORRUPT; }else{ iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); - res = memcmp(&pLeaf->p[iOff], iter.term.p, MIN(nTerm, iter.term.n)); - if( res==0 ) res = nTerm - iter.term.n; + res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm)); + if( res==0 ) res = nTerm - nIdxTerm; if( res<0 ) p->rc = FTS5_CORRUPT; } } @@ -5220,23 +5119,20 @@ static void fts5IndexIntegrityCheckSegment( /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ - for(i=1; p->rc==SQLITE_OK && i<=iter.nEmpty; i++){ - pLeaf = fts5DataRead(p, iRow+i); - if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){ - p->rc = FTS5_CORRUPT; - } - fts5DataRelease(pLeaf); - } + fts5IndexIntegrityCheckEmpty( + p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1 + ); + if( p->rc ) break; /* If there is a doclist-index, check that it looks right. */ - if( iter.bDlidx ){ + if( bIdxDlidx ){ Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ - int iPrevLeaf = iter.iLeaf; + int iPrevLeaf = iIdxLeaf; int iSegid = pSeg->iSegid; int iPg; i64 iKey; - for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iter.iLeaf); + for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(p, pDlidx) ){ @@ -5269,26 +5165,26 @@ static void fts5IndexIntegrityCheckSegment( } } - for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){ - iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg); - pLeaf = fts5DataRead(p, iKey); - if( pLeaf ){ - if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; - fts5DataRelease(pLeaf); - } - } - + iDlidxPrevLeaf = iPg; fts5DlidxIterFree(pDlidx); - fts5TestDlidxReverse(p, iSegid, iter.iLeaf); + fts5TestDlidxReverse(p, iSegid, iIdxLeaf); + }else{ + iDlidxPrevLeaf = pSeg->pgnoLast; + /* TODO: Check there is no doclist index */ } + + iIdxPrevLeaf = iIdxLeaf; } + rc2 = sqlite3_finalize(pStmt); + if( p->rc==SQLITE_OK ) p->rc = rc2; + /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */ +#if 0 if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ p->rc = FTS5_CORRUPT; } - - fts5BtreeIterFree(&iter); +#endif } diff --git a/ext/fts5/fts5_storage.c b/ext/fts5/fts5_storage.c index da822ffad2..1ddbb7940a 100644 --- a/ext/fts5/fts5_storage.c +++ b/ext/fts5/fts5_storage.c @@ -180,8 +180,10 @@ static int fts5ExecPrintf( int sqlite3Fts5DropAll(Fts5Config *pConfig){ int rc = fts5ExecPrintf(pConfig->db, 0, "DROP TABLE IF EXISTS %Q.'%q_data';" + "DROP TABLE IF EXISTS %Q.'%q_idx';" "DROP TABLE IF EXISTS %Q.'%q_config';", pConfig->zDb, pConfig->zName, + pConfig->zDb, pConfig->zName, pConfig->zDb, pConfig->zName ); if( rc==SQLITE_OK && pConfig->bColumnsize ){ @@ -218,6 +220,7 @@ int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){ int rc = sqlite3Fts5StorageSync(pStorage, 1); fts5StorageRenameOne(pConfig, &rc, "data", zName); + fts5StorageRenameOne(pConfig, &rc, "idx", zName); fts5StorageRenameOne(pConfig, &rc, "config", zName); if( pConfig->bColumnsize ){ fts5StorageRenameOne(pConfig, &rc, "docsize", zName); diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index 70e086e8c9..39be723c32 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -27,6 +27,7 @@ do_execsql_test 1.0 { } { t1 {CREATE VIRTUAL TABLE t1 USING fts5(a, b, c)} t1_data {CREATE TABLE 't1_data'(id INTEGER PRIMARY KEY, block BLOB)} + t1_idx {CREATE TABLE 't1_idx'(segid, term, pgno, dlidx, PRIMARY KEY(segid, term)) WITHOUT ROWID} t1_content {CREATE TABLE 't1_content'(id INTEGER PRIMARY KEY, c0, c1, c2)} t1_docsize {CREATE TABLE 't1_docsize'(id INTEGER PRIMARY KEY, sz BLOB)} t1_config {CREATE TABLE 't1_config'(k PRIMARY KEY, v) WITHOUT ROWID} @@ -47,9 +48,10 @@ do_execsql_test 2.0 { do_execsql_test 2.1 { INSERT INTO t1 VALUES('a b c', 'd e f'); } + do_test 2.2 { execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } -} {/{\(structure\) {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=1 leaves=1..1}}}/} +} {/{\(structure\) {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=0 leaves=1..1}}}/} foreach w {a b c d e f} { do_execsql_test 2.3.$w.asc { diff --git a/ext/fts5/test/fts5content.test b/ext/fts5/test/fts5content.test index f87aa3d947..69e66a54f8 100644 --- a/ext/fts5/test/fts5content.test +++ b/ext/fts5/test/fts5content.test @@ -247,7 +247,7 @@ reset_db do_execsql_test 6.1 { CREATE VIRTUAL TABLE xx USING fts5(x, y, content=""); SELECT name FROM sqlite_master; -} {xx xx_data xx_docsize xx_config} +} {xx xx_data xx_idx xx_docsize xx_config} do_execsql_test 6.2 { DROP TABLE xx; SELECT name FROM sqlite_master; diff --git a/manifest b/manifest index 824a4c2997..4cdbff045d 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\ssome\sharmless\scompiler\swarnings. -D 2015-07-15T18:35:54.200 +C Use\sa\sWITHOUT\sROWID\stable\sto\sindex\sfts5\sbtree\sleaves.\sThis\sis\sfaster\sto\squery\sand\sonly\sslightly\slarger\sthan\sstoring\sbtree\snodes\swithin\san\sintkey\stable. +D 2015-07-15T19:46:02.242 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in 6e8af213d49e6325bf283ebed7662254f8e15bda F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -109,12 +109,12 @@ F ext/fts5/fts5.h 81d1a92fc2b4bd477af7e4e0b38b456f3e199fba F ext/fts5/fts5Int.h 8d9bce1847a10df2e4ed9492ea4f3868276748fb F ext/fts5/fts5_aux.c 044cb176a815f4388308738437f6e130aa384fb0 F ext/fts5/fts5_buffer.c 80f9ba4431848cb857e3d2158f5280093dcd8015 -F ext/fts5/fts5_config.c b2456e9625bca41c51d54c363e369c6356895c90 +F ext/fts5/fts5_config.c fdfa63ae8e527ecfaa50f94063c610429cc887cf F ext/fts5/fts5_expr.c d2e148345639c5a5583e0daa39a639bf298ae6a7 F ext/fts5/fts5_hash.c 219f4edd72e5cf95b19c33f1058809a18fad5229 -F ext/fts5/fts5_index.c cfd41d49591e4e4ce2a5f84de35512f59fbb360d +F ext/fts5/fts5_index.c 7fe8e8afdb872b55726263b2a82288ebabda969c F ext/fts5/fts5_main.c 8f279999deb204b0c7760464f60f88666046398b -F ext/fts5/fts5_storage.c 1c35a38a564ee9cadcbd7ae0b13a806bdda722bd +F ext/fts5/fts5_storage.c 877399c557f273a725b5e4fc26f07e67ca90570a F ext/fts5/fts5_tcl.c 85eb4e0d0fefa9420b78151496ad4599a1783e20 F ext/fts5/fts5_tokenize.c 30f97a8c74683797b4cd233790444fbefb3b0708 F ext/fts5/fts5_unicode2.c 78273fbd588d1d9bd0a7e4e0ccc9207348bae33c @@ -123,7 +123,7 @@ F ext/fts5/fts5_vocab.c 4e268a3fcbc099e50e335a1135be985a41ff6f7f F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl e0b4a846a7670f6232a644ece69ef25a5c19c0e8 -F ext/fts5/test/fts5aa.test 4e896b9154764fed48179a87ba0bdf3650d7f49d +F ext/fts5/test/fts5aa.test 8dac4216e5ad8fd240cff078d1893520bb1f5fb2 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 9737992d08c56bfd4803e933744d2d764e23795c F ext/fts5/test/fts5ad.test b2edee8b7de0c21d2c88f8a18c195034aad6952d @@ -142,7 +142,7 @@ F ext/fts5/test/fts5auxdata.test 141a7cbffcceb1bd2799b4b29c183ff8780d586e F ext/fts5/test/fts5bigpl.test 04ee0d7eebbebf17c31f5a0b5c5f9494eac3a0cb F ext/fts5/test/fts5columnsize.test 97dc6bd66c91009d00407aa078dd5e9e8eb22f99 F ext/fts5/test/fts5config.test ad2ff42ddc856aed2d05bf89dc1c578c8a39ea3b -F ext/fts5/test/fts5content.test d0d90a45f0bcf07d75d474500d81f941b45e2021 +F ext/fts5/test/fts5content.test 9a952c95518a14182dc3b59e3c8fa71cda82a4e1 F ext/fts5/test/fts5corrupt.test 928c9c91d40690d301f943a7ed0ffc19e0d0e7b6 F ext/fts5/test/fts5corrupt2.test 1a830ccd6dbe1b601c7e3f5bbc1cf77bd8c8803b F ext/fts5/test/fts5corrupt3.test 1ccf575f5126e79f9fec7979fd02a1f40a076be3 @@ -1365,7 +1365,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P b522c95ddcd7046dca756f4d1a1e90c34dbcab64 -R 5ae06ee700ba79856dd8d83789b8e902 -U drh -Z c7acc62193fe6f94b78724b3b176c9a0 +P 110cd84f5e842c4dcd9b9398cea211e25f36b3aa +R 6be01aa6bafcfd1604a2d6d0b1df9b9e +T *branch * fts5-btree-index +T *sym-fts5-btree-index * +T -sym-trunk * +U dan +Z 0f1494b26034248995977524e0d0424a diff --git a/manifest.uuid b/manifest.uuid index 3c8bff2494..856beb149f 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -110cd84f5e842c4dcd9b9398cea211e25f36b3aa \ No newline at end of file +862418e3506d4b7cca9c44d58c2eb9dc915d75c9 \ No newline at end of file