From e386a1ba25dcb607fd740619c8f14eb4257ecb63 Mon Sep 17 00:00:00 2001 From: dan Date: Sat, 5 Sep 2015 19:52:08 +0000 Subject: [PATCH 01/10] Experiment with a different fts5 leaf page format that allows faster seeks. FossilOrigin-Name: a1f4c3b543eed84e808f6b901a38179786fffe16 --- ext/fts5/fts5_index.c | 328 +++++++++++++++++++++++++--------- ext/fts5/test/fts5aa.test | 7 +- ext/fts5/test/fts5ad.test | 3 + ext/fts5/test/fts5simple.test | 74 ++++++++ ext/fts5/tool/loadfts5.tcl | 1 + manifest | 26 +-- manifest.uuid | 2 +- test/permutations.test | 2 +- 8 files changed, 341 insertions(+), 102 deletions(-) create mode 100644 ext/fts5/test/fts5simple.test diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 5bf4feba93..7fa5a0fae4 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -303,7 +303,8 @@ typedef struct Fts5StructureSegment Fts5StructureSegment; struct Fts5Data { u8 *p; /* Pointer to buffer containing record */ - int n; /* Size of record in bytes */ + int nn; /* Size of record in bytes */ + int szLeaf; /* Size of leaf without page-index */ }; /* @@ -377,7 +378,8 @@ struct Fts5Structure { */ struct Fts5PageWriter { int pgno; /* Page number for this page */ - Fts5Buffer buf; /* Buffer containing page data */ + Fts5Buffer buf; /* Buffer containing leaf data */ + Fts5Buffer pgidx; /* Buffer containing page-index */ Fts5Buffer term; /* Buffer containing previous term on page */ }; struct Fts5DlidxWriter { @@ -392,6 +394,7 @@ struct Fts5SegWriter { i64 iPrevRowid; /* Previous rowid written to current leaf */ u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ u8 bFirstRowidInPage; /* True if next rowid is first in page */ + /* TODO1: Can use (writer.pgidx.n==0) instead of bFirstTermInPage */ u8 bFirstTermInPage; /* True if next term will be first in leaf */ int nLeafWritten; /* Number of leaf pages written */ int nEmpty; /* Number of contiguous term-less nodes */ @@ -500,10 +503,27 @@ struct Fts5SegIter { int bDel; /* True if the delete flag is set */ }; +/* +** Argument is a pointer to an Fts5Data structure that contains a +** leaf page. +*/ +#define ASSERT_SZLEAF_OK(x) assert( \ + (x)->szLeaf==fts5GetU16(&(x)->p[2]) || (x)->szLeaf==(x)->nn \ +) + #define FTS5_SEGITER_ONETERM 0x01 #define FTS5_SEGITER_REVERSE 0x02 +/* +** Argument is a pointer to an Fts5Data structure that contains a leaf +** page. This macro evaluates to true if the leaf contains no terms, or +** false if it contains at least one term. +*/ +#define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) + +#define fts5LeafFirstTermOff(x) (fts5GetU16(&x->p[(x)->szLeaf])) + /* ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. @@ -679,7 +699,7 @@ static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ int nAlloc = sizeof(Fts5Data) + nByte + FTS5_DATA_PADDING; pRet = (Fts5Data*)sqlite3_malloc(nAlloc); if( pRet ){ - pRet->n = nByte; + pRet->nn = nByte; aOut = pRet->p = (u8*)&pRet[1]; }else{ rc = SQLITE_NOMEM; @@ -691,6 +711,9 @@ static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ if( rc!=SQLITE_OK ){ sqlite3_free(pRet); pRet = 0; + }else{ + /* TODO1: Fix this */ + pRet->szLeaf = fts5GetU16(&pRet->p[2]); } } p->rc = rc; @@ -974,8 +997,9 @@ static Fts5Structure *fts5StructureRead(Fts5Index *p){ pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); if( p->rc ) return 0; - memset(&pData->p[pData->n], 0, FTS5_DATA_PADDING); - p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet); + /* TODO: Do we need this if the leaf-index is appended? Probably... */ + memset(&pData->p[pData->nn], 0, FTS5_DATA_PADDING); + p->rc = fts5StructureDecode(pData->p, pData->nn, &iCookie, &pRet); if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){ p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); } @@ -1178,11 +1202,11 @@ static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ pLvl->iFirstOff = pLvl->iOff; }else{ int iOff; - for(iOff=pLvl->iOff; iOffn; iOff++){ + for(iOff=pLvl->iOff; iOffnn; iOff++){ if( pData->p[iOff] ) break; } - if( iOffn ){ + if( iOffnn ){ i64 iVal; pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal); @@ -1470,7 +1494,8 @@ static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ if( p->rc==SQLITE_OK ){ int iOff = pIter->iLeafOffset; /* Offset to read at */ - if( iOff>=pIter->pLeaf->n ){ + ASSERT_SZLEAF_OK(pIter->pLeaf); + if( iOff>=pIter->pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ const u8 *a = &pIter->pLeaf->p[iOff]; @@ -1483,7 +1508,8 @@ static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ int iOff = pIter->iLeafOffset; - if( iOff>=pIter->pLeaf->n ){ + ASSERT_SZLEAF_OK(pIter->pLeaf); + if( iOff>=pIter->pLeaf->szLeaf ){ fts5SegIterNextPage(p, pIter); if( pIter->pLeaf==0 ){ if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; @@ -1559,7 +1585,8 @@ static void fts5SegIterInit( if( p->rc==SQLITE_OK ){ u8 *a = pIter->pLeaf->p; - pIter->iLeafOffset = fts5GetU16(&a[2]); + pIter->iLeafOffset = fts5GetU16(&a[pIter->pLeaf->szLeaf]); + assert( pIter->iLeafOffset==4 ); fts5SegIterLoadTerm(p, pIter, 0); fts5SegIterLoadNPos(p, pIter); } @@ -1581,11 +1608,12 @@ static void fts5SegIterInit( ** byte of the position list content associated with said rowid. */ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ - int n = pIter->pLeaf->n; + int n = pIter->pLeaf->szLeaf; int i = pIter->iLeafOffset; u8 *a = pIter->pLeaf->p; int iRowidOffset = 0; + ASSERT_SZLEAF_OK(pIter->pLeaf); while( 1 ){ i64 iDelta = 0; int nPos; @@ -1633,7 +1661,7 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ )); if( pNew ){ if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ - if( pIter->iTermLeafOffsetn ){ + if( pIter->iTermLeafOffsetszLeaf ){ pIter->pLeaf = pNew; pIter->iLeafOffset = pIter->iTermLeafOffset; } @@ -1712,8 +1740,9 @@ static void fts5SegIterNext( /* Search for the end of the position list within the current page. */ u8 *a = pLeaf->p; - int n = pLeaf->n; + int n = pLeaf->szLeaf; + ASSERT_SZLEAF_OK(pLeaf); iOff = pIter->iLeafOffset + pIter->nPos; if( iOff=n ){ fts5SegIterNextPage(p, pIter); pIter->iLeafOffset = 4; - }else if( iOff!=fts5GetU16(&a[2]) ){ + }else if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep); } }else{ @@ -1745,7 +1774,8 @@ static void fts5SegIterNext( pIter->pLeaf = 0; }else{ pIter->pLeaf->p = (u8*)pList; - pIter->pLeaf->n = nList; + pIter->pLeaf->nn = nList; + pIter->pLeaf->szLeaf = nList; sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm); pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); } @@ -1756,15 +1786,17 @@ static void fts5SegIterNext( fts5SegIterNextPage(p, pIter); pLeaf = pIter->pLeaf; if( pLeaf==0 ) break; - if( (iOff = fts5GetU16(&pLeaf->p[0])) && iOffn ){ + ASSERT_SZLEAF_OK(pLeaf); + if( (iOff = fts5GetU16(&pLeaf->p[0])) && iOffszLeaf ){ iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; } - else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){ + else if( pLeaf->nn>pLeaf->szLeaf ){ + iOff = fts5GetU16(&pLeaf->p[pLeaf->szLeaf]); pIter->iLeafOffset = iOff; bNewTerm = 1; } - if( iOff>=pLeaf->n ){ + if( iOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; return; } @@ -1819,7 +1851,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ /* Search for a new term within the current leaf. If one can be found, ** then this page contains the largest rowid for the current term. */ - while( iOffn ){ + while( iOffszLeaf ){ int nPos; i64 iDelta; int bDummy; @@ -1827,7 +1859,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ /* Read the position-list size field */ iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); iOff += nPos; - if( iOff>=pLeaf->n ) break; + if( iOff>=pLeaf->szLeaf ) break; /* Rowid delta. Or, if 0x00, the end of doclist marker. */ nPos = fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); @@ -1838,7 +1870,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ /* If this condition is true then the largest rowid for the current ** term may not be stored on the current page. So search forward to ** see where said rowid really is. */ - if( iOff>=pLeaf->n ){ + if( iOff>=pLeaf->szLeaf ){ int pgno; Fts5StructureSegment *pSeg = pIter->pSeg; @@ -1848,14 +1880,15 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pgno); Fts5Data *pNew = fts5DataRead(p, iAbs); if( pNew ){ - int iRowid, iTerm; - fts5LeafHeader(pNew, &iRowid, &iTerm); + int iRowid, bTermless; + iRowid = fts5GetU16(pNew->p); + bTermless = fts5LeafIsTermless(pNew); if( iRowid ){ SWAPVAL(Fts5Data*, pNew, pLast); pgnoLast = pgno; } fts5DataRelease(pNew); - if( iTerm ) break; + if( bTermless==0 ) break; } } } @@ -1903,7 +1936,7 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ ** term. */ if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ int iOff = pIter->iLeafOffset + pIter->nPos; - while( iOffn ){ + while( iOffszLeaf ){ int bDummy; int nPos; i64 iDelta; @@ -1911,7 +1944,7 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ /* iOff is currently the offset of the start of position list data */ iOff += fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); if( iDelta==0 ) return; - assert_nc( iOffn ); + assert_nc( iOffszLeaf ); iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); iOff += nPos; } @@ -1955,16 +1988,18 @@ static void fts5LeafSeek( ){ int iOff; const u8 *a = pIter->pLeaf->p; - int n = pIter->pLeaf->n; + int n = pIter->pLeaf->szLeaf; int nMatch = 0; int nKeep = 0; int nNew = 0; + int iTerm = 0; + int nPgTerm = (pIter->pLeaf->nn - pIter->pLeaf->szLeaf) >> 1; assert( p->rc==SQLITE_OK ); assert( pIter->pLeaf ); - iOff = fts5GetU16(&a[2]); + iOff = fts5GetU16(&a[n]); if( iOff<4 || iOff>=n ){ p->rc = FTS5_CORRUPT; return; @@ -2001,6 +2036,7 @@ static void fts5LeafSeek( } iOff += nNew; +#if 0 /* Skip past the doclist. If the end of the page is reached, bail out. */ while( 1 ){ int nPos; @@ -2023,6 +2059,19 @@ static void fts5LeafSeek( } }; + iTerm++; + assert( iTerm=nPgTerm ){ + iOff = n; + break; + } + iOff = fts5GetU16(&a[n + iTerm*2]); +#endif + /* Read the nKeep field of the next term. */ fts5IndexGetVarint32(a, iOff, nKeep); } @@ -2037,9 +2086,9 @@ static void fts5LeafSeek( fts5SegIterNextPage(p, pIter); if( pIter->pLeaf==0 ) return; a = pIter->pLeaf->p; - iOff = fts5GetU16(&a[2]); - if( iOff ){ - if( iOff<4 || iOff>=n ){ + if( fts5LeafIsTermless(pIter->pLeaf)==0 ){ + iOff = fts5LeafFirstTermOff(pIter->pLeaf); + if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ nKeep = 0; @@ -2190,7 +2239,7 @@ static void fts5SegIterHashInit( pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); if( pLeaf==0 ) return; pLeaf->p = (u8*)pList; - pLeaf->n = nList; + pLeaf->nn = pLeaf->szLeaf = nList; pIter->pLeaf = pLeaf; pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); @@ -2383,7 +2432,7 @@ static void fts5SegIterGotoPage( if( p->rc==SQLITE_OK ){ int iOff; u8 *a = pIter->pLeaf->p; - int n = pIter->pLeaf->n; + int n = pIter->pLeaf->szLeaf; iOff = fts5GetU16(&a[0]); if( iOff<4 || iOff>=n ){ @@ -2717,7 +2766,7 @@ static void fts5MultiIterNew2( Fts5SegIter *pIter = &pNew->aSeg[1]; pIter->flags = FTS5_SEGITER_ONETERM; - if( pData->n>0 ){ + if( pData->szLeaf>0 ){ pIter->pLeaf = pData; pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); pNew->aFirst[1].iFirst = 1; @@ -2797,7 +2846,7 @@ static void fts5ChunkIterate( int nRem = pSeg->nPos; /* Number of bytes still to come */ Fts5Data *pData = 0; u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; - int nChunk = MIN(nRem, pSeg->pLeaf->n - pSeg->iLeafOffset); + int nChunk = MIN(nRem, pSeg->pLeaf->szLeaf - pSeg->iLeafOffset); int pgno = pSeg->iLeafPgno; int pgnoSave = 0; @@ -2816,7 +2865,7 @@ static void fts5ChunkIterate( pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pgno)); if( pData==0 ) break; pChunk = &pData->p[4]; - nChunk = MIN(nRem, pData->n - 4); + nChunk = MIN(nRem, pData->szLeaf - 4); if( pgno==pgnoSave ){ assert( pSeg->pNextLeaf==0 ); pSeg->pNextLeaf = pData; @@ -3102,18 +3151,28 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ Fts5PageWriter *pPage = &pWriter->writer; i64 iRowid; + assert( (pPage->pgidx.n==0)==(pWriter->bFirstTermInPage) ); + + /* Set the szLeaf header field. */ + assert( 0==fts5GetU16(&pPage->buf.p[2]) ); + fts5PutU16(&pPage->buf.p[2], pPage->buf.n); + if( pWriter->bFirstTermInPage ){ /* No term was written to this page. */ - assert( 0==fts5GetU16(&pPage->buf.p[2]) ); + assert( pPage->pgidx.n==0 ); fts5WriteBtreeNoTerm(p, pWriter); + }else{ + /* Append the pgidx to the page buffer. Set the szLeaf header field. */ + fts5BufferAppendBlob(&p->rc, &pPage->buf, pPage->pgidx.n, pPage->pgidx.p); } - /* Write the current page to the db. */ + /* Write the page out to disk */ iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno); fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); /* Initialize the next page. */ fts5BufferZero(&pPage->buf); + fts5BufferZero(&pPage->pgidx); fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); pPage->pgno++; @@ -3139,6 +3198,7 @@ static void fts5WriteAppendTerm( ){ int nPrefix; /* Bytes of prefix compression for term */ Fts5PageWriter *pPage = &pWriter->writer; + Fts5Buffer *pPgidx = &pWriter->writer.pgidx; assert( pPage->buf.n==0 || pPage->buf.n>4 ); if( pPage->buf.n==0 ){ @@ -3149,10 +3209,16 @@ static void fts5WriteAppendTerm( } if( p->rc ) return; + /* TODO1: Can this be consolidated with FlushOneHash version? */ + fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); + pPgidx->n += 2; + if( pWriter->bFirstTermInPage ){ /* Update the "first term" field of the page header. */ +#if 0 assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 ); fts5PutU16(&pPage->buf.p[2], pPage->buf.n); +#endif nPrefix = 0; if( pPage->pgno!=1 ){ /* This is the first term on a leaf that is not the leftmost leaf in @@ -3196,7 +3262,7 @@ static void fts5WriteAppendTerm( pWriter->aDlidx[0].pgno = pPage->pgno; /* If the current leaf page is full, flush it to disk. */ - if( pPage->buf.n>=p->pConfig->pgsz ){ + if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); } } @@ -3234,7 +3300,7 @@ static void fts5WriteAppendRowid( fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos); - if( pPage->buf.n>=p->pConfig->pgsz ){ + if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); } } @@ -3251,8 +3317,10 @@ static void fts5WriteAppendPoslistData( int n = nData; assert( p->pConfig->pgsz>0 ); - while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pConfig->pgsz ){ - int nReq = p->pConfig->pgsz - pPage->buf.n; + while( p->rc==SQLITE_OK + && (pPage->buf.n + pPage->pgidx.n + n)>=p->pConfig->pgsz + ){ + int nReq = p->pConfig->pgsz - pPage->buf.n - pPage->pgidx.n; int nCopy = 0; while( nCopyterm); fts5BufferFree(&pLeaf->buf); + fts5BufferFree(&pLeaf->pgidx); fts5BufferFree(&pWriter->btterm); for(i=0; inDlidx; i++){ @@ -3321,6 +3390,7 @@ static void fts5WriteInit( pWriter->bFirstTermInPage = 1; pWriter->iBtPage = 1; + fts5BufferGrow(&p->rc, &pWriter->writer.pgidx, p->pConfig->pgsz + 20); if( p->pIdxWriter==0 ){ Fts5Config *pConfig = p->pConfig; fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf( @@ -3334,6 +3404,51 @@ static void fts5WriteInit( } } +/* +** The buffer passed as the second argument contains a leaf page that is +** missing its page-idx array. The first term is guaranteed to start at +** byte offset 4 of the buffer. The szLeaf field of the leaf page header +** is already populated. +** +** This function appends a page-index to the buffer. The buffer is +** guaranteed to be large enough to fit the page-index. +*/ +static void fts5MakePageidx(Fts5Index *p, Fts5Buffer *pBuf){ + if( p->rc==SQLITE_OK ){ + u8 *a = pBuf->p; + int szLeaf = pBuf->n; + int iOff = 4; + int nTerm; + + fts5PutU16(&pBuf->p[pBuf->n], iOff); + pBuf->n += 2; + fts5IndexGetVarint32(a, iOff, nTerm); + iOff += nTerm; + + while( iOff=szLeaf ) break; + + /* Skip past position list */ + fts5IndexGetVarint32(a, iOff, nTerm); + iOff += (nTerm >> 1); + + if( iOff>=(szLeaf-2) ) break; + + /* If this is the end of the doclist, break out of the loop */ + if( a[iOff]==0x00 ){ + iOff++; + fts5PutU16(&pBuf->p[pBuf->n], iOff); + pBuf->n += 2; + fts5IndexGetVarint32(a, iOff, nTerm); + fts5IndexGetVarint32(a, iOff, nTerm); + iOff += nTerm; + } + } + } +} + /* ** Iterator pIter was used to iterate through the input segments of on an ** incremental merge operation. This function is called if the incremental @@ -3358,16 +3473,25 @@ static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){ i64 iLeafRowid; Fts5Data *pData; int iId = pSeg->pSeg->iSegid; - u8 aHdr[4] = {0x00, 0x00, 0x00, 0x04}; + u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00}; iLeafRowid = FTS5_SEGMENT_ROWID(iId, 0, pSeg->iTermLeafPgno); pData = fts5DataRead(p, iLeafRowid); if( pData ){ fts5BufferZero(&buf); + fts5BufferGrow(&p->rc, &buf, pData->nn); fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); - fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]); + fts5BufferAppendBlob(&p->rc, &buf, pData->szLeaf-iOff, &pData->p[iOff]); + if( p->rc==SQLITE_OK ){ + /* Set the szLeaf field */ + fts5PutU16(&buf.p[2], buf.n); + } + + /* Set up the new page-index array */ + fts5MakePageidx(p, &buf); + fts5DataRelease(pData); pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid); @@ -3679,6 +3803,7 @@ static void fts5FlushOneHash(Fts5Index *p){ Fts5StructureSegment *pSeg; /* New segment within pStruct */ int nHeight; /* Height of new segment b-tree */ Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ + Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ const u8 *zPrev = 0; Fts5SegWriter writer; @@ -3688,6 +3813,7 @@ static void fts5FlushOneHash(Fts5Index *p){ ** page size. */ assert( pgsz>0 ); pBuf = &writer.writer.buf; + pPgidx = &writer.writer.pgidx; fts5BufferGrow(&p->rc, pBuf, pgsz + 20); /* Begin scanning through hash table entries. This loop runs once for each @@ -3707,11 +3833,11 @@ static void fts5FlushOneHash(Fts5Index *p){ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); nTerm = strlen(zTerm); - /* Decide if the term will fit on the current leaf. If it will not, - ** flush the leaf to disk here. */ - if( pBuf->n>4 && (pBuf->n + nTerm + 2) > pgsz ){ + /* Decide if the term will fit on the current leaf. If it will not, + ** flush the leaf to disk here. + ** TODO1: Is this calculation still correct? */ + if( pBuf->n>4 && (pBuf->n + nTerm + 2 + pPgidx->n + 2) > pgsz ){ fts5WriteFlushLeaf(p, &writer); - pBuf = &writer.writer.buf; if( (nTerm + 32) > pBuf->nSpace ){ fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); if( p->rc ) break; @@ -3721,12 +3847,15 @@ static void fts5FlushOneHash(Fts5Index *p){ /* Write the term to the leaf. And if it is the first on the leaf, and ** the leaf is not page number 1, push it up into the b-tree hierarchy ** as well. */ + + /* TODO1: Writing pgidx here! */ + fts5PutU16(&pPgidx->p[pPgidx->n], pBuf->n); + pPgidx->n += 2; if( writer.bFirstTermInPage==0 ){ int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nPre); nSuffix = nTerm - nPre; }else{ - fts5PutU16(&pBuf->p[2], pBuf->n); writer.bFirstTermInPage = 0; if( writer.writer.pgno!=1 ){ int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); @@ -3745,7 +3874,7 @@ static void fts5FlushOneHash(Fts5Index *p){ assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 ); writer.aDlidx[0].pgno = writer.writer.pgno; - if( pgsz>=(pBuf->n + nDoclist + 1) ){ + if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); }else{ @@ -3777,7 +3906,7 @@ static void fts5FlushOneHash(Fts5Index *p){ } assert( pBuf->n<=pBuf->nSpace ); - if( (pBuf->n + nCopy) <= pgsz ){ + if( (pBuf->n + pPgidx->n + nCopy) <= pgsz ){ /* The entire poslist will fit on the current leaf. So copy ** it in one go. */ fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); @@ -3788,7 +3917,7 @@ static void fts5FlushOneHash(Fts5Index *p){ const u8 *pPoslist = &pDoclist[iOff]; int iPos = 0; while( p->rc==SQLITE_OK ){ - int nSpace = pgsz - pBuf->n; + int nSpace = pgsz - pBuf->n - pPgidx->n; int n = 0; if( (nCopy - iPos)<=nSpace ){ n = nCopy - iPos; @@ -3798,9 +3927,8 @@ static void fts5FlushOneHash(Fts5Index *p){ assert( n>0 ); fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); iPos += n; - if( pBuf->n>=pgsz ){ + if( (pBuf->n + pPgidx->n)>=pgsz ){ fts5WriteFlushLeaf(p, &writer); - pBuf = &writer.writer.buf; } if( iPos>=nCopy ) break; } @@ -4163,7 +4291,7 @@ static void fts5SetupPrefixIter( pData = fts5IdxMalloc(p, sizeof(Fts5Data) + doclist.n); if( pData ){ pData->p = (u8*)&pData[1]; - pData->n = doclist.n; + pData->nn = pData->szLeaf = doclist.n; memcpy(pData->p, doclist.p, doclist.n); fts5MultiIterNew2(p, pData, bDesc, ppIter); } @@ -4393,6 +4521,11 @@ int sqlite3Fts5IndexQuery( memcpy(&buf.p[1], pToken, nToken); #ifdef SQLITE_DEBUG + /* If the QUERY_TEST_NOIDX flag was specified, then this must be a + ** prefix-query. Instead of using a prefix-index (if one exists), + ** evaluate the prefix query using the main FTS index. This is used + ** for internal sanity checking by the integrity-check in debug + ** mode only. */ if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ assert( flags & FTS5INDEX_QUERY_PREFIX ); iIdx = 1+pConfig->nPrefix; @@ -4513,7 +4646,7 @@ int sqlite3Fts5IterPoslist( assert( pIter->pIndex->rc==SQLITE_OK ); *piRowid = pSeg->iRowid; *pn = pSeg->nPos; - if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){ + if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->szLeaf ){ *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset]; }else{ fts5BufferZero(&pIter->poslist); @@ -4561,11 +4694,11 @@ int sqlite3Fts5IndexGetAverages(Fts5Index *p, i64 *pnRow, i64 *anSize){ *pnRow = 0; memset(anSize, 0, sizeof(i64) * nCol); pData = fts5DataRead(p, FTS5_AVERAGES_ROWID); - if( p->rc==SQLITE_OK && pData->n ){ + if( p->rc==SQLITE_OK && pData->nn ){ int i = 0; int iCol; i += fts5GetVarint(&pData->p[i], (u64*)pnRow); - for(iCol=0; in && iColnn && iColp[i], (u64*)&anSize[iCol]); } } @@ -4770,18 +4903,25 @@ static void fts5TestTerm( if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; /* If this is a prefix query, check that the results returned if the - ** the index is disabled are the same. In both ASC and DESC order. */ - if( iIdx>0 && rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; - ck2 = 0; - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); - if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; - } - if( iIdx>0 && rc==SQLITE_OK ){ - int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; - ck2 = 0; - rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); - if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + ** the index is disabled are the same. In both ASC and DESC order. + ** + ** This check may only be performed if the hash table is empty. This + ** is because the hash table only supports a single scan query at + ** a time, and the multi-iter loop from which this function is called + ** is already performing such a scan. */ + if( p->nPendingData==0 ){ + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; + ck2 = 0; + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } + if( iIdx>0 && rc==SQLITE_OK ){ + int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; + ck2 = 0; + rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); + if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; + } } cksum3 ^= ck1; @@ -4822,7 +4962,7 @@ static void fts5IndexIntegrityCheckEmpty( for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){ Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, i)); if( pLeaf ){ - if( 0!=fts5GetU16(&pLeaf->p[2]) ) p->rc = FTS5_CORRUPT; + if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT; if( i>=iNoRowid && 0!=fts5GetU16(&pLeaf->p[0]) ) p->rc = FTS5_CORRUPT; } fts5DataRelease(pLeaf); @@ -4851,7 +4991,6 @@ static void fts5IndexIntegrityCheckSegment( while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){ i64 iRow; /* Rowid for this leaf */ Fts5Data *pLeaf; /* Data for this leaf */ - int iOff; /* Offset of first term on leaf */ int nIdxTerm = sqlite3_column_bytes(pStmt, 1); const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1); @@ -4869,14 +5008,15 @@ static void fts5IndexIntegrityCheckSegment( ** to or larger than the split-key in zIdxTerm. Also check that if there ** is also a rowid pointer within the leaf page header, it points to a ** location before the term. */ - iOff = fts5GetU16(&pLeaf->p[2]); - if( iOff==0 ){ + if( pLeaf->nn<=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ - int iRowidOff; + int iOff; /* Offset of first term on leaf */ + int iRowidOff; /* Offset of first rowid on leaf */ int nTerm; /* Size of term on leaf in bytes */ int res; /* Comparison of term and split-key */ + iOff = fts5LeafFirstTermOff(pLeaf); iRowidOff = fts5GetU16(&pLeaf->p[0]); if( iRowidOff>=iOff ){ p->rc = FTS5_CORRUPT; @@ -4929,7 +5069,8 @@ static void fts5IndexIntegrityCheckSegment( if( pLeaf ){ i64 iRowid; int iRowidOff = fts5GetU16(&pLeaf->p[0]); - if( iRowidOff>=pLeaf->n ){ + ASSERT_SZLEAF_OK(pLeaf); + if( iRowidOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); @@ -5193,8 +5334,10 @@ static int fts5DecodeDoclist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ i64 iDocid; int iOff = 0; - iOff = sqlite3Fts5GetVarint(&a[iOff], (u64*)&iDocid); - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " rowid=%lld", iDocid); + if( n>0 ){ + iOff = sqlite3Fts5GetVarint(a, (u64*)&iDocid); + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " id=%lld", iDocid); + } while( iOff=4 ){ iRowidOff = fts5GetU16(&a[0]); - iTermOff = fts5GetU16(&a[2]); + szLeaf = fts5GetU16(&a[2]); + if( szLeaf Date: Mon, 7 Sep 2015 08:14:30 +0000 Subject: [PATCH 02/10] Use macros to make the code in fts5_index.c easier to read. FossilOrigin-Name: 67ff5ae81357eb7fa28049bb724a22cb6f52e076 --- ext/fts5/fts5_index.c | 25 +++++++++++++------------ manifest | 15 ++++++--------- manifest.uuid | 2 +- 3 files changed, 20 insertions(+), 22 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 7fa5a0fae4..87eb54b125 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -522,7 +522,8 @@ struct Fts5SegIter { */ #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) -#define fts5LeafFirstTermOff(x) (fts5GetU16(&x->p[(x)->szLeaf])) +#define fts5LeafFirstTermOff(x) (fts5GetU16(&(x)->p[(x)->szLeaf])) +#define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) /* ** poslist: @@ -1585,8 +1586,8 @@ static void fts5SegIterInit( if( p->rc==SQLITE_OK ){ u8 *a = pIter->pLeaf->p; - pIter->iLeafOffset = fts5GetU16(&a[pIter->pLeaf->szLeaf]); - assert( pIter->iLeafOffset==4 ); + pIter->iLeafOffset = 4; + assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); fts5SegIterLoadTerm(p, pIter, 0); fts5SegIterLoadNPos(p, pIter); } @@ -1787,12 +1788,12 @@ static void fts5SegIterNext( pLeaf = pIter->pLeaf; if( pLeaf==0 ) break; ASSERT_SZLEAF_OK(pLeaf); - if( (iOff = fts5GetU16(&pLeaf->p[0])) && iOffszLeaf ){ + if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOffszLeaf ){ iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; } else if( pLeaf->nn>pLeaf->szLeaf ){ - iOff = fts5GetU16(&pLeaf->p[pLeaf->szLeaf]); + iOff = fts5LeafFirstTermOff(pLeaf); pIter->iLeafOffset = iOff; bNewTerm = 1; } @@ -1881,7 +1882,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ Fts5Data *pNew = fts5DataRead(p, iAbs); if( pNew ){ int iRowid, bTermless; - iRowid = fts5GetU16(pNew->p); + iRowid = fts5LeafFirstRowidOff(pNew); bTermless = fts5LeafIsTermless(pNew); if( iRowid ){ SWAPVAL(Fts5Data*, pNew, pLast); @@ -1999,7 +2000,7 @@ static void fts5LeafSeek( assert( p->rc==SQLITE_OK ); assert( pIter->pLeaf ); - iOff = fts5GetU16(&a[n]); + iOff = fts5LeafFirstTermOff(pIter->pLeaf); if( iOff<4 || iOff>=n ){ p->rc = FTS5_CORRUPT; return; @@ -2434,7 +2435,7 @@ static void fts5SegIterGotoPage( u8 *a = pIter->pLeaf->p; int n = pIter->pLeaf->szLeaf; - iOff = fts5GetU16(&a[0]); + iOff = fts5LeafFirstRowidOff(pIter->pLeaf); if( iOff<4 || iOff>=n ){ p->rc = FTS5_CORRUPT; }else{ @@ -4963,7 +4964,7 @@ static void fts5IndexIntegrityCheckEmpty( Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, i)); if( pLeaf ){ if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT; - if( i>=iNoRowid && 0!=fts5GetU16(&pLeaf->p[0]) ) p->rc = FTS5_CORRUPT; + if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT; } fts5DataRelease(pLeaf); if( p->rc ) break; @@ -5017,7 +5018,7 @@ static void fts5IndexIntegrityCheckSegment( int res; /* Comparison of term and split-key */ iOff = fts5LeafFirstTermOff(pLeaf); - iRowidOff = fts5GetU16(&pLeaf->p[0]); + iRowidOff = fts5LeafFirstRowidOff(pLeaf); if( iRowidOff>=iOff ){ p->rc = FTS5_CORRUPT; }else{ @@ -5056,7 +5057,7 @@ static void fts5IndexIntegrityCheckSegment( iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ - if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; + if( fts5LeafFirstRowidOff(pLeaf)!=0 ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } } @@ -5068,7 +5069,7 @@ static void fts5IndexIntegrityCheckSegment( pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ i64 iRowid; - int iRowidOff = fts5GetU16(&pLeaf->p[0]); + int iRowidOff = fts5LeafFirstRowidOff(pLeaf); ASSERT_SZLEAF_OK(pLeaf); if( iRowidOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; diff --git a/manifest b/manifest index 6a1f86e904..f47bf05938 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Experiment\swith\sa\sdifferent\sfts5\sleaf\spage\sformat\sthat\sallows\sfaster\sseeks. -D 2015-09-05T19:52:08.105 +C Use\smacros\sto\smake\sthe\scode\sin\sfts5_index.c\seasier\sto\sread. +D 2015-09-07T08:14:30.857 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in f85066ce844a28b671aaeeff320921cd0ce36239 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 80f9ba4431848cb857e3d2158f5280093dcd8015 F ext/fts5/fts5_config.c 80b61fd2c6844b64a3e72a64572d50a812da9384 F ext/fts5/fts5_expr.c 1c24e1a2ffb286bfe37e537a43b7fadabfe993d4 F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246 -F ext/fts5/fts5_index.c c34a64666c3b573aaed0fe103ce739ca2c0b88e5 +F ext/fts5/fts5_index.c 213e5aea27100a2ebb7a576d6574bcc28c594520 F ext/fts5/fts5_main.c e9d0892424bb7f0a8b58613d4ff75cb650cf286e F ext/fts5/fts5_storage.c 120f7b143688b5b7710dacbd48cff211609b8059 F ext/fts5/fts5_tcl.c 6da58d6e8f42a93c4486b5ba9b187a7f995dee37 @@ -1384,10 +1384,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 24924a58197e558a9e8800cc5c91dc8fb32f3557 -R 00e6b769eaa54af1e95eef69109c890a -T *branch * fts5-incompatible -T *sym-fts5-incompatible * -T -sym-trunk * +P a1f4c3b543eed84e808f6b901a38179786fffe16 +R d9f9c87817152716ded47406f74ba235 U dan -Z 9f2973699597cf834c49e709dc04f160 +Z 4c8df884c6ae97a50d0e8662a6ca4b0e diff --git a/manifest.uuid b/manifest.uuid index 6229a84693..a4473a78af 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -a1f4c3b543eed84e808f6b901a38179786fffe16 \ No newline at end of file +67ff5ae81357eb7fa28049bb724a22cb6f52e076 \ No newline at end of file From fe8e2eba0a64aa1234fecc4efa5a9c3f7efed202 Mon Sep 17 00:00:00 2001 From: dan Date: Tue, 8 Sep 2015 19:55:26 +0000 Subject: [PATCH 03/10] Remove the 0x00 terminators from the end of doclists stored on disk. FossilOrigin-Name: 00d990061dec3661b0376bd167082942d5563bfe --- ext/fts5/fts5Int.h | 10 + ext/fts5/fts5_buffer.c | 6 +- ext/fts5/fts5_config.c | 3 + ext/fts5/fts5_index.c | 454 ++++++++++++++++++---------------- ext/fts5/fts5_main.c | 4 + ext/fts5/test/fts5aa.test | 5 - ext/fts5/test/fts5simple.test | 53 +++- main.mk | 4 +- manifest | 26 +- manifest.uuid | 2 +- 10 files changed, 323 insertions(+), 244 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index 837ecb1ccd..c52ce67d70 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -117,6 +117,12 @@ typedef struct Fts5Config Fts5Config; ** bColumnsize: ** True if the %_docsize table is created. ** +** bPrefixIndex: +** This is only used for debugging. If set to false, any prefix indexes +** are ignored. This value is configured using: +** +** INSERT INTO tbl(tbl, rank) VALUES('prefix-index', $bPrefixIndex); +** */ struct Fts5Config { sqlite3 *db; /* Database handle */ @@ -145,6 +151,10 @@ struct Fts5Config { /* If non-NULL, points to sqlite3_vtab.base.zErrmsg. Often NULL. */ char **pzErrmsg; + +#ifdef SQLITE_DEBUG + int bPrefixIndex; /* True to use prefix-indexes */ +#endif }; /* Current expected value of %_config table 'version' field */ diff --git a/ext/fts5/fts5_buffer.c b/ext/fts5/fts5_buffer.c index 07e1243c36..1a7c0d0f8a 100644 --- a/ext/fts5/fts5_buffer.c +++ b/ext/fts5/fts5_buffer.c @@ -16,12 +16,14 @@ #include "fts5Int.h" int sqlite3Fts5BufferGrow(int *pRc, Fts5Buffer *pBuf, int nByte){ - /* A no-op if an error has already occurred */ - if( *pRc ) return 1; if( (pBuf->n + nByte) > pBuf->nSpace ){ u8 *pNew; int nNew = pBuf->nSpace ? pBuf->nSpace*2 : 64; + + /* A no-op if an error has already occurred */ + if( *pRc ) return 1; + while( nNew<(pBuf->n + nByte) ){ nNew = nNew * 2; } diff --git a/ext/fts5/fts5_config.c b/ext/fts5/fts5_config.c index 74faf6dd30..19e3d3ab1f 100644 --- a/ext/fts5/fts5_config.c +++ b/ext/fts5/fts5_config.c @@ -480,6 +480,9 @@ int sqlite3Fts5ConfigParse( pRet->zDb = sqlite3Fts5Strndup(&rc, azArg[1], -1); pRet->zName = sqlite3Fts5Strndup(&rc, azArg[2], -1); pRet->bColumnsize = 1; +#ifdef SQLITE_DEBUG + pRet->bPrefixIndex = 1; +#endif if( rc==SQLITE_OK && sqlite3_stricmp(pRet->zName, FTS5_RANK_NAME)==0 ){ *pzErr = sqlite3_mprintf("reserved fts5 table name: %s", pRet->zName); rc = SQLITE_ERROR; diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 87eb54b125..9bc08e7443 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -475,6 +475,9 @@ struct Fts5CResult { ** For each rowid on the page corresponding to the current term, the ** corresponding aRowidOffset[] entry is set to the byte offset of the ** start of the "position-list-size" field within the page. +** +** iTermIdx: +** Index of current term on iTermLeafPgno. */ struct Fts5SegIter { Fts5StructureSegment *pSeg; /* Segment to iterate through */ @@ -489,6 +492,9 @@ struct Fts5SegIter { int iTermLeafPgno; int iTermLeafOffset; + int iTermIdx; + int iEndofDoclist; + /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ int iRowidOffset; /* Current entry in aRowidOffset[] */ int nRowidOffset; /* Allocated size of aRowidOffset[] array */ @@ -508,7 +514,7 @@ struct Fts5SegIter { ** leaf page. */ #define ASSERT_SZLEAF_OK(x) assert( \ - (x)->szLeaf==fts5GetU16(&(x)->p[2]) || (x)->szLeaf==(x)->nn \ + (x)->szLeaf==(x)->nn || (x)->szLeaf==fts5GetU16(&(x)->p[2]) \ ) #define FTS5_SEGITER_ONETERM 0x01 @@ -522,9 +528,12 @@ struct Fts5SegIter { */ #define fts5LeafIsTermless(x) ((x)->szLeaf >= (x)->nn) -#define fts5LeafFirstTermOff(x) (fts5GetU16(&(x)->p[(x)->szLeaf])) +#define fts5LeafTermOff(x, i) (fts5GetU16(&(x)->p[(x)->szLeaf + (i)*2])) + #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) +#define fts5LeafFirstTermOff(x) fts5LeafTermOff(x, 0) + /* ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. @@ -1153,8 +1162,9 @@ static void fts5StructurePromote( int szPromote = 0; /* Promote anything this size or smaller */ Fts5StructureSegment *pSeg; /* Segment just written */ int szSeg; /* Size of segment just written */ + int nSeg = pStruct->aLevel[iLvl].nSeg; - + if( nSeg==0 ) return; pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); @@ -1464,6 +1474,14 @@ static void fts5SegIterNextPage( }else{ pIter->pLeaf = 0; } + + if( pIter->pLeaf ){ + if( fts5LeafIsTermless(pIter->pLeaf) ){ + pIter->iEndofDoclist = pIter->pLeaf->nn+1; + }else{ + pIter->iEndofDoclist = fts5LeafFirstTermOff(pIter->pLeaf); + } + } } /* @@ -1505,6 +1523,20 @@ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ } } +static void fts5SegIterLoadEod(Fts5Index *p, Fts5SegIter *pIter){ + Fts5Data *pLeaf = pIter->pLeaf; + int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2; + + assert( pIter->iLeafPgno==pIter->iTermLeafPgno ); + if( (pIter->iTermIdx+1)szLeaf + (pIter->iTermIdx + 1) * 2; + pIter->iEndofDoclist = fts5GetU16(&pLeaf->p[iRead]); + }else{ + pIter->iEndofDoclist = pLeaf->nn+1; + } + +} + static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ int iOff = pIter->iLeafOffset; @@ -1551,6 +1583,7 @@ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ pIter->iTermLeafPgno = pIter->iLeafPgno; pIter->iLeafOffset = iOff; + fts5SegIterLoadEod(p, pIter); fts5SegIterLoadRowid(p, pIter); } @@ -1585,9 +1618,10 @@ static void fts5SegIterInit( } if( p->rc==SQLITE_OK ){ - u8 *a = pIter->pLeaf->p; pIter->iLeafOffset = 4; + assert_nc( pIter->pLeaf->nn>4 ); assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); + pIter->iTermIdx = 0; fts5SegIterLoadTerm(p, pIter, 0); fts5SegIterLoadNPos(p, pIter); } @@ -1614,6 +1648,10 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ u8 *a = pIter->pLeaf->p; int iRowidOffset = 0; + if( n>pIter->iEndofDoclist ){ + n = pIter->iEndofDoclist; + } + ASSERT_SZLEAF_OK(pIter->pLeaf); while( 1 ){ i64 iDelta = 0; @@ -1624,7 +1662,6 @@ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ i += nPos; if( i>=n ) break; i += fts5GetVarint(&a[i], (u64*)&iDelta); - if( iDelta==0 ) break; pIter->iRowid += iDelta; if( iRowidOffset>=pIter->nRowidOffset ){ @@ -1667,8 +1704,8 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ pIter->iLeafOffset = pIter->iTermLeafOffset; } }else{ - int iRowidOff, dummy; - fts5LeafHeader(pNew, &iRowidOff, &dummy); + int iRowidOff; + iRowidOff = fts5LeafFirstRowidOff(pNew); if( iRowidOff ){ pIter->pLeaf = pNew; pIter->iLeafOffset = iRowidOff; @@ -1686,6 +1723,7 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ } if( pIter->pLeaf ){ + pIter->iEndofDoclist = pIter->pLeaf->nn+1; fts5SegIterReverseInitPage(p, pIter); } } @@ -1747,21 +1785,26 @@ static void fts5SegIterNext( iOff = pIter->iLeafOffset + pIter->nPos; if( iOffiLeafOffset = iOff; - if( iDelta==0 ){ + /* The next entry is on the current page. */ + assert_nc( iOff<=pIter->iEndofDoclist ); + if( iOff>=pIter->iEndofDoclist ){ bNewTerm = 1; - if( iOff>=n ){ - fts5SegIterNextPage(p, pIter); - pIter->iLeafOffset = 4; - }else if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ - pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep); + if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ + pIter->iTermIdx++; + }else{ + pIter->iTermIdx = 0; + } + if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ + iOff += fts5GetVarint32(&a[iOff], nKeep); } }else{ + u64 iDelta; + iOff += sqlite3Fts5GetVarint(&a[iOff], &iDelta); pIter->iRowid += iDelta; + assert_nc( iDelta>0 ); } + pIter->iLeafOffset = iOff; + }else if( pIter->pSeg==0 ){ const u8 *pList = 0; const char *zTerm = 0; @@ -1777,6 +1820,7 @@ static void fts5SegIterNext( pIter->pLeaf->p = (u8*)pList; pIter->pLeaf->nn = nList; pIter->pLeaf->szLeaf = nList; + pIter->iEndofDoclist = nList+1; sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm); pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); } @@ -1796,6 +1840,7 @@ static void fts5SegIterNext( iOff = fts5LeafFirstTermOff(pLeaf); pIter->iLeafOffset = iOff; bNewTerm = 1; + pIter->iTermIdx = 0; } if( iOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; @@ -1847,31 +1892,11 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ ** byte of position-list content for the current rowid. Back it up ** so that it points to the start of the position-list size field. */ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); - iOff = pIter->iLeafOffset; - assert( iOff>=4 ); - - /* Search for a new term within the current leaf. If one can be found, - ** then this page contains the largest rowid for the current term. */ - while( iOffszLeaf ){ - int nPos; - i64 iDelta; - int bDummy; - - /* Read the position-list size field */ - iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); - iOff += nPos; - if( iOff>=pLeaf->szLeaf ) break; - - /* Rowid delta. Or, if 0x00, the end of doclist marker. */ - nPos = fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); - if( iDelta==0 ) break; - iOff += nPos; - } /* If this condition is true then the largest rowid for the current ** term may not be stored on the current page. So search forward to ** see where said rowid really is. */ - if( iOff>=pLeaf->szLeaf ){ + if( pIter->iEndofDoclist>=pLeaf->szLeaf ){ int pgno; Fts5StructureSegment *pSeg = pIter->pSeg; @@ -1905,14 +1930,20 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ ** first rowid on this page. */ if( pLast ){ - int dummy; int iOff; fts5DataRelease(pIter->pLeaf); pIter->pLeaf = pLast; pIter->iLeafPgno = pgnoLast; - fts5LeafHeader(pLast, &iOff, &dummy); + iOff = fts5LeafFirstRowidOff(pLast); iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; + + if( fts5LeafIsTermless(pLast) ){ + pIter->iEndofDoclist = pLast->nn+1; + }else{ + pIter->iEndofDoclist = fts5LeafTermOff(pLast, 0); + } + } fts5SegIterReverseInitPage(p, pIter); @@ -1935,30 +1966,20 @@ static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ /* Check if the current doclist ends on this page. If it does, return ** early without loading the doclist-index (as it belongs to a different ** term. */ - if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ - int iOff = pIter->iLeafOffset + pIter->nPos; - while( iOffszLeaf ){ - int bDummy; - int nPos; - i64 iDelta; - - /* iOff is currently the offset of the start of position list data */ - iOff += fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); - if( iDelta==0 ) return; - assert_nc( iOffszLeaf ); - iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); - iOff += nPos; - } + if( pIter->iTermLeafPgno==pIter->iLeafPgno + && pIter->iEndofDoclistszLeaf + ){ + return; } pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); } #define fts5IndexGetVarint32(a, iOff, nVal) { \ - nVal = a[iOff++]; \ + nVal = (a)[iOff++]; \ if( nVal & 0x80 ){ \ iOff--; \ - iOff += fts5GetVarint32(&a[iOff], nVal); \ + iOff += fts5GetVarint32(&(a)[iOff], nVal); \ } \ } @@ -2037,41 +2058,12 @@ static void fts5LeafSeek( } iOff += nNew; -#if 0 - /* Skip past the doclist. If the end of the page is reached, bail out. */ - while( 1 ){ - int nPos; - - /* Skip past rowid delta */ - fts5IndexSkipVarint(a, iOff); - - /* Skip past position list */ - fts5IndexGetVarint32(a, iOff, nPos); - iOff += (nPos >> 1); - if( iOff>=(n-1) ){ - iOff = n; - goto search_failed; - } - - /* If this is the end of the doclist, break out of the loop */ - if( a[iOff]==0x00 ){ - iOff++; - break; - } - }; - - iTerm++; - assert( iTerm=nPgTerm ){ iOff = n; break; } iOff = fts5GetU16(&a[n + iTerm*2]); -#endif /* Read the nKeep field of the next term. */ fts5IndexGetVarint32(a, iOff, nKeep); @@ -2084,6 +2076,7 @@ static void fts5LeafSeek( return; }else if( iOff>=n ){ do { + iTerm = 0; fts5SegIterNextPage(p, pIter); if( pIter->pLeaf==0 ) return; a = pIter->pLeaf->p; @@ -2101,6 +2094,8 @@ static void fts5LeafSeek( } search_success: + pIter->iTermIdx = iTerm; + pIter->iLeafOffset = iOff + nNew; pIter->iTermLeafOffset = pIter->iLeafOffset; pIter->iTermLeafPgno = pIter->iLeafPgno; @@ -2108,6 +2103,7 @@ static void fts5LeafSeek( fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm); fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); + fts5SegIterLoadEod(p, pIter); fts5SegIterLoadRowid(p, pIter); fts5SegIterLoadNPos(p, pIter); } @@ -2243,6 +2239,7 @@ static void fts5SegIterHashInit( pLeaf->nn = pLeaf->szLeaf = nList; pIter->pLeaf = pLeaf; pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); + pIter->iEndofDoclist = pLeaf->nn+1; if( flags & FTS5INDEX_QUERY_DESC ){ pIter->flags |= FTS5_SEGITER_REVERSE; @@ -2770,6 +2767,7 @@ static void fts5MultiIterNew2( if( pData->szLeaf>0 ){ pIter->pLeaf = pData; pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); + pIter->iEndofDoclist = pData->nn; pNew->aFirst[1].iFirst = 1; if( bDesc ){ pNew->bRev = 1; @@ -3201,25 +3199,23 @@ static void fts5WriteAppendTerm( Fts5PageWriter *pPage = &pWriter->writer; Fts5Buffer *pPgidx = &pWriter->writer.pgidx; - assert( pPage->buf.n==0 || pPage->buf.n>4 ); - if( pPage->buf.n==0 ){ - /* Zero the first term and first rowid fields */ - static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; - fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); - assert( pWriter->bFirstTermInPage ); - } if( p->rc ) return; + assert( pPage->buf.n>=4 ); + assert( pPage->buf.n>4 || pWriter->bFirstTermInPage ); + + /* If the current leaf page is full, flush it to disk. */ + if( (pPage->buf.n + pPage->pgidx.n + nTerm + 2)>=p->pConfig->pgsz ){ + if( pPage->buf.n>4 ){ + fts5WriteFlushLeaf(p, pWriter); + } + fts5BufferGrow(&p->rc, &pPage->buf, nTerm+FTS5_DATA_PADDING); + } - /* TODO1: Can this be consolidated with FlushOneHash version? */ + /* TODO1: Updating pgidx here. */ fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); pPgidx->n += 2; if( pWriter->bFirstTermInPage ){ - /* Update the "first term" field of the page header. */ -#if 0 - assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 ); - fts5PutU16(&pPage->buf.p[2], pPage->buf.n); -#endif nPrefix = 0; if( pPage->pgno!=1 ){ /* This is the first term on a leaf that is not the leftmost leaf in @@ -3261,11 +3257,6 @@ static void fts5WriteAppendTerm( assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); pWriter->aDlidx[0].pgno = pPage->pgno; - - /* If the current leaf page is full, flush it to disk. */ - if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ - fts5WriteFlushLeaf(p, pWriter); - } } /* @@ -3280,6 +3271,10 @@ static void fts5WriteAppendRowid( if( p->rc==SQLITE_OK ){ Fts5PageWriter *pPage = &pWriter->writer; + if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ + fts5WriteFlushLeaf(p, pWriter); + } + /* If this is to be the first rowid written to the page, set the ** rowid-pointer in the page-header. Also append a value to the dlidx ** buffer, in case a doclist-index is required. */ @@ -3300,10 +3295,6 @@ static void fts5WriteAppendRowid( pWriter->bFirstRowidInPage = 0; fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos); - - if( (pPage->buf.n + pPage->pgidx.n)>=p->pConfig->pgsz ){ - fts5WriteFlushLeaf(p, pWriter); - } } } @@ -3383,6 +3374,8 @@ static void fts5WriteInit( Fts5SegWriter *pWriter, int iSegid ){ + const int nBuffer = p->pConfig->pgsz + FTS5_DATA_PADDING; + memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = iSegid; @@ -3391,7 +3384,10 @@ static void fts5WriteInit( pWriter->bFirstTermInPage = 1; pWriter->iBtPage = 1; - fts5BufferGrow(&p->rc, &pWriter->writer.pgidx, p->pConfig->pgsz + 20); + /* Grow the two buffers to pgsz + padding bytes in size. */ + fts5BufferGrow(&p->rc, &pWriter->writer.pgidx, nBuffer); + fts5BufferGrow(&p->rc, &pWriter->writer.buf, nBuffer); + if( p->pIdxWriter==0 ){ Fts5Config *pConfig = p->pConfig; fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf( @@ -3401,6 +3397,13 @@ static void fts5WriteInit( } if( p->rc==SQLITE_OK ){ + /* Initialize the 4-byte leaf-page header to 0x00. */ + memset(pWriter->writer.buf.p, 0, 4); + pWriter->writer.buf.n = 4; + + /* Bind the current output segment id to the index-writer. This is an + ** optimization over binding the same value over and over as rows are + ** inserted into %_idx by the current writer. */ sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); } } @@ -3414,38 +3417,39 @@ static void fts5WriteInit( ** This function appends a page-index to the buffer. The buffer is ** guaranteed to be large enough to fit the page-index. */ -static void fts5MakePageidx(Fts5Index *p, Fts5Buffer *pBuf){ +static void fts5MakePageidx( + Fts5Index *p, /* Fts index object to store any error in */ + Fts5Data *pOld, /* Original page data */ + int iOldidx, /* Index of term in pOld pgidx */ + Fts5Buffer *pBuf /* Buffer containing new page (no pgidx) */ +){ if( p->rc==SQLITE_OK ){ - u8 *a = pBuf->p; - int szLeaf = pBuf->n; - int iOff = 4; - int nTerm; + int iOff; + int iEnd; + int nByte; + int iEndOld; /* Byte after term iOldIdx on old page */ + int iEndNew; /* Byte after term iOldIdx on new page */ + int ii; + int nPgIdx = (pOld->nn - pOld->szLeaf) / 2; - fts5PutU16(&pBuf->p[pBuf->n], iOff); + /* Determine end of term on old page */ + iEndOld = fts5LeafTermOff(pOld, iOldidx); + if( iOldidx>0 ){ + iEndOld += fts5GetVarint32(&pOld->p[iEndOld], nByte); + } + iEndOld += fts5GetVarint32(&pOld->p[iEndOld], nByte); + iEndOld += nByte; + + /* Determine end of term on new page */ + iEndNew = 4 + fts5GetVarint32(&pBuf->p[4], nByte); + iEndNew += nByte; + + fts5PutU16(&pBuf->p[pBuf->n], 4); pBuf->n += 2; - fts5IndexGetVarint32(a, iOff, nTerm); - iOff += nTerm; - - while( iOff=szLeaf ) break; - - /* Skip past position list */ - fts5IndexGetVarint32(a, iOff, nTerm); - iOff += (nTerm >> 1); - - if( iOff>=(szLeaf-2) ) break; - - /* If this is the end of the doclist, break out of the loop */ - if( a[iOff]==0x00 ){ - iOff++; - fts5PutU16(&pBuf->p[pBuf->n], iOff); - pBuf->n += 2; - fts5IndexGetVarint32(a, iOff, nTerm); - fts5IndexGetVarint32(a, iOff, nTerm); - iOff += nTerm; - } + for(ii=iOldidx+1; iip[pBuf->n], iVal + (iEndNew - iEndOld)); + pBuf->n += 2; } } } @@ -3491,7 +3495,7 @@ static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){ } /* Set up the new page-index array */ - fts5MakePageidx(p, &buf); + fts5MakePageidx(p, pData, pSeg->iTermIdx, &buf); fts5DataRelease(pData); pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; @@ -3595,8 +3599,9 @@ static void fts5IndexMergeLevel( } /* This is a new term. Append a term to the output segment. */ + /* TODO2: Doclist 0x00 term */ if( bRequireDoclistTerm ){ - fts5WriteAppendZerobyte(p, &writer); + /* fts5WriteAppendZerobyte(p, &writer); */ } fts5WriteAppendTerm(p, &writer, nTerm, pTerm); fts5BufferSet(&p->rc, &term, nTerm, pTerm); @@ -3739,6 +3744,7 @@ static void fts5IndexCrisismerge( assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ fts5IndexMergeLevel(p, &pStruct, iLvl, 0); + assert( p->rc!=SQLITE_OK || pStruct->nLevel>(iLvl+1) ); fts5StructurePromote(p, iLvl+1, pStruct); iLvl++; } @@ -3766,10 +3772,12 @@ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ int ret; u32 dummy; ret = fts5GetVarint32(aBuf, dummy); - while( 1 ){ - int i = fts5GetVarint32(&aBuf[ret], dummy); - if( (ret + i) > nMax ) break; - ret += i; + if( ret nMax ) break; + ret += i; + } } return ret; } @@ -3810,72 +3818,32 @@ static void fts5FlushOneHash(Fts5Index *p){ Fts5SegWriter writer; fts5WriteInit(p, &writer, iSegid); - /* Pre-allocate the buffer used to assemble leaf pages to the target - ** page size. */ - assert( pgsz>0 ); pBuf = &writer.writer.buf; pPgidx = &writer.writer.pgidx; - fts5BufferGrow(&p->rc, pBuf, pgsz + 20); + + /* fts5WriteInit() should have initialized the buffers to (most likely) + ** the maximum space required. */ + assert( p->rc || pBuf->nSpace>=(pgsz + FTS5_DATA_PADDING) ); + assert( p->rc || pPgidx->nSpace>=(pgsz + FTS5_DATA_PADDING) ); /* Begin scanning through hash table entries. This loop runs once for each ** term/doclist currently stored within the hash table. */ if( p->rc==SQLITE_OK ){ - memset(pBuf->p, 0, 4); - pBuf->n = 4; p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); } while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ const char *zTerm; /* Buffer containing term */ - int nTerm; /* Size of zTerm in bytes */ const u8 *pDoclist; /* Pointer to doclist for this term */ int nDoclist; /* Size of doclist in bytes */ int nSuffix; /* Size of term suffix */ + /* Write the term for this entry to disk. */ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); - nTerm = strlen(zTerm); + fts5WriteAppendTerm(p, &writer, strlen(zTerm), zTerm); - /* Decide if the term will fit on the current leaf. If it will not, - ** flush the leaf to disk here. - ** TODO1: Is this calculation still correct? */ - if( pBuf->n>4 && (pBuf->n + nTerm + 2 + pPgidx->n + 2) > pgsz ){ - fts5WriteFlushLeaf(p, &writer); - if( (nTerm + 32) > pBuf->nSpace ){ - fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); - if( p->rc ) break; - } - } - - /* Write the term to the leaf. And if it is the first on the leaf, and - ** the leaf is not page number 1, push it up into the b-tree hierarchy - ** as well. */ - - /* TODO1: Writing pgidx here! */ - fts5PutU16(&pPgidx->p[pPgidx->n], pBuf->n); - pPgidx->n += 2; - if( writer.bFirstTermInPage==0 ){ - int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); - pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nPre); - nSuffix = nTerm - nPre; - }else{ - writer.bFirstTermInPage = 0; - if( writer.writer.pgno!=1 ){ - int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); - fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm); - pBuf = &writer.writer.buf; - assert( nPren += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nSuffix); - fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix); - - /* We just wrote a term into page writer.aWriter[0].pgno. If a - ** doclist-index is to be generated for this doclist, it will be - ** associated with this page. */ - assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 ); - writer.aDlidx[0].pgno = writer.writer.pgno; - - if( pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) ){ + if( writer.bFirstRowidInPage==0 + && pgsz>=(pBuf->n + pPgidx->n + nDoclist + 1) + ){ /* The entire doclist will fit on the current leaf. */ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); }else{ @@ -3883,7 +3851,7 @@ static void fts5FlushOneHash(Fts5Index *p){ i64 iDelta = 0; int iOff = 0; - writer.bFirstRowidInPage = 0; + /* writer.bFirstRowidInPage = 0; */ /* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current @@ -3938,7 +3906,8 @@ static void fts5FlushOneHash(Fts5Index *p){ } } - pBuf->p[pBuf->n++] = '\0'; + /* TODO2: Doclist terminator written here. */ + /* pBuf->p[pBuf->n++] = '\0'; */ assert( pBuf->n<=pBuf->nSpace ); zPrev = (const u8*)zTerm; sqlite3Fts5HashScanNext(pHash); @@ -4527,7 +4496,7 @@ int sqlite3Fts5IndexQuery( ** evaluate the prefix query using the main FTS index. This is used ** for internal sanity checking by the integrity-check in debug ** mode only. */ - if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ + if( pConfig->bPrefixIndex==0 || (flags & FTS5INDEX_QUERY_TEST_NOIDX) ){ assert( flags & FTS5INDEX_QUERY_PREFIX ); iIdx = 1+pConfig->nPrefix; }else @@ -4971,6 +4940,48 @@ static void fts5IndexIntegrityCheckEmpty( } } +static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ + int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2; + int ii; + Fts5Buffer buf1 = {0,0,0}; + Fts5Buffer buf2 = {0,0,0}; + + for(ii=0; p->rc==SQLITE_OK && ii=pLeaf->szLeaf ){ + p->rc = FTS5_CORRUPT; + }else if( ii==0 ){ + int nByte; + iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); + if( (iOff+nByte)>pLeaf->szLeaf ){ + p->rc = FTS5_CORRUPT; + }else{ + fts5BufferSet(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); + } + }else{ + int nKeep, nByte; + iOff += fts5GetVarint32(&pLeaf->p[iOff], nKeep); + iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); + if( nKeep>buf1.n || (iOff+nByte)>pLeaf->szLeaf ){ + p->rc = FTS5_CORRUPT; + }else{ + buf1.n = nKeep; + fts5BufferAppendBlob(&p->rc, &buf1, nByte, &pLeaf->p[iOff]); + } + + if( p->rc==SQLITE_OK ){ + res = fts5BufferCompare(&buf1, &buf2); + if( res<=0 ) p->rc = FTS5_CORRUPT; + } + } + fts5BufferSet(&p->rc, &buf2, buf1.n, buf1.p); + } + + fts5BufferFree(&buf1); + fts5BufferFree(&buf2); +} + static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ Fts5StructureSegment *pSeg /* Segment to check internal consistency */ @@ -5027,6 +5038,8 @@ static void fts5IndexIntegrityCheckSegment( if( res==0 ) res = nTerm - nIdxTerm; if( res<0 ) p->rc = FTS5_CORRUPT; } + + fts5IntegrityCheckPgidx(p, pLeaf); } fts5DataRelease(pLeaf); if( p->rc ) break; @@ -5418,51 +5431,56 @@ static void fts5DecodeFunction( int szLeaf = 0; int iRowidOff = 0; int iOff; - int nKeep = 0; + int nPgTerm = 0; + int nDoclist; + int i; memset(&term, 0, sizeof(Fts5Buffer)); - if( n>=4 ){ - iRowidOff = fts5GetU16(&a[0]); - szLeaf = fts5GetU16(&a[2]); - if( szLeaf0 ){ + iOff += fts5GetVarint32(&a[iOff], nByte); + term.n = nByte; + } iOff += fts5GetVarint32(&a[iOff], nByte); - term.n= nKeep; fts5BufferAppendBlob(&rc, &term, nByte, &a[iOff]); iOff += nByte; - sqlite3Fts5BufferAppendPrintf( &rc, &s, " term=%.*s", term.n, (const char*)term.p ); - iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], szLeaf-iOff); - if( iOffpStorage, nMerge); }else if( 0==sqlite3_stricmp("integrity-check", z) ){ rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); +#ifdef SQLITE_TEST + }else if( 0==sqlite3_stricmp("prefix-index", z) ){ + pConfig->bPrefixIndex = sqlite3_value_int(pVal); +#endif }else{ rc = sqlite3Fts5IndexLoadConfig(pTab->pIndex); if( rc==SQLITE_OK ){ diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index e20893dcf7..ae445f740d 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -203,11 +203,6 @@ for {set i 1} {$i <= 10} {incr i} { if {[set_test_counter errors]} break } -#db eval { SELECT fts5_decode(rowid, block) as x FROM t1_data } { puts $x } -#puts [db eval {SELECT rowid FROM t1 WHERE t1 MATCH 'aaa' ORDER BY rowid ASC}] -#puts [db eval {SELECT rowid FROM t1 WHERE t1 MATCH 'aaa' ORDER BY rowid DESC}] -#exit - #------------------------------------------------------------------------- # reset_db diff --git a/ext/fts5/test/fts5simple.test b/ext/fts5/test/fts5simple.test index c146090f3f..eeac11b5ca 100644 --- a/ext/fts5/test/fts5simple.test +++ b/ext/fts5/test/fts5simple.test @@ -11,7 +11,7 @@ # source [file join [file dirname [info script]] fts5_common.tcl] -set testprefix fts5aa +set testprefix fts5simple # If SQLITE_ENABLE_FTS5 is defined, omit this file. ifcapable !fts5 { @@ -50,7 +50,6 @@ do_execsql_test 2.1 { INSERT INTO t1(t1) VALUES('integrity-check'); } {} -} #------------------------------------------------------------------------- # @@ -63,12 +62,58 @@ do_execsql_test 3.0 { SELECT * FROM t1 WHERE t1 MATCH 'o*'; } {one} -breakpoint do_execsql_test 3.1 { INSERT INTO t1(t1) VALUES('integrity-check'); } {} -# db eval { SELECT fts5_decode(rowid, block) as x FROM t1_data } { puts $x } +} + +#------------------------------------------------------------------------- +reset_db +do_execsql_test 4.1 { + CREATE VIRTUAL TABLE t11 USING fts5(content); + INSERT INTO t11(t11, rank) VALUES('pgsz', 32); + INSERT INTO t11 VALUES('another'); + INSERT INTO t11 VALUES('string'); + INSERT INTO t11 VALUES('of'); + INSERT INTO t11 VALUES('text'); +} +do_test 4.2 { + execsql { INSERT INTO t11(t11) VALUES('optimize') } +} {} +do_execsql_test 4.3 { + INSERT INTO t11(t11) VALUES('integrity-check'); +} {} + +#db eval { SELECT fts5_decode(rowid, block) as x FROM t11_data } { puts $x } + +#------------------------------------------------------------------------- +reset_db +set doc [string repeat "x y " 5] +do_execsql_test 5.1 { + CREATE VIRTUAL TABLE yy USING fts5(content); + INSERT INTO yy(yy, rank) VALUES('pgsz', 32); + BEGIN; + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + INSERT INTO yy VALUES($doc); + COMMIT; +} + +do_execsql_test 5.2 { + SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid ASC +} {1 2 3 4 5 6 7 8} + +do_execsql_test 5.3 { + SELECT rowid FROM yy WHERE yy MATCH 'y' ORDER BY rowid DESC +} {8 7 6 5 4 3 2 1} + +#db eval { SELECT fts5_decode(rowid, block) as x FROM yy_data } { puts $x } finish_test diff --git a/main.mk b/main.mk index d07e473be6..a10199f3d1 100644 --- a/main.mk +++ b/main.mk @@ -332,7 +332,9 @@ TESTSRC += \ $(TOP)/ext/misc/vfslog.c \ $(TOP)/ext/fts5/fts5_tcl.c \ $(TOP)/ext/fts5/fts5_test_mi.c \ - fts5.c + $(FTS5_SRC) + +# fts5.c #TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c diff --git a/manifest b/manifest index f47bf05938..36d5700540 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Use\smacros\sto\smake\sthe\scode\sin\sfts5_index.c\seasier\sto\sread. -D 2015-09-07T08:14:30.857 +C Remove\sthe\s0x00\sterminators\sfrom\sthe\send\sof\sdoclists\sstored\son\sdisk. +D 2015-09-08T19:55:26.385 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in f85066ce844a28b671aaeeff320921cd0ce36239 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,14 +106,14 @@ F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h f04659e0df5af83731b102189a32280f74f4a6bc -F ext/fts5/fts5Int.h f65d41f66accad0a289d6bd66b13c07d2932f9be +F ext/fts5/fts5Int.h 7e6002582133cb795a21468732cf7c35027a28e4 F ext/fts5/fts5_aux.c 7a307760a9c57c750d043188ec0bad59f5b5ec7e -F ext/fts5/fts5_buffer.c 80f9ba4431848cb857e3d2158f5280093dcd8015 -F ext/fts5/fts5_config.c 80b61fd2c6844b64a3e72a64572d50a812da9384 +F ext/fts5/fts5_buffer.c 64dcaf36a3ebda9e84b7c3b8788887ec325e12a4 +F ext/fts5/fts5_config.c 57ee5fe71578cb494574fc0e6e51acb9a22a8695 F ext/fts5/fts5_expr.c 1c24e1a2ffb286bfe37e537a43b7fadabfe993d4 F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246 -F ext/fts5/fts5_index.c 213e5aea27100a2ebb7a576d6574bcc28c594520 -F ext/fts5/fts5_main.c e9d0892424bb7f0a8b58613d4ff75cb650cf286e +F ext/fts5/fts5_index.c 677c859b2064e00903a9ad847a1cfca8d36ce595 +F ext/fts5/fts5_main.c 3ec19f23693e034028afb9b4fa7c800dc3eda5ab F ext/fts5/fts5_storage.c 120f7b143688b5b7710dacbd48cff211609b8059 F ext/fts5/fts5_tcl.c 6da58d6e8f42a93c4486b5ba9b187a7f995dee37 F ext/fts5/fts5_test_mi.c e96be827aa8f571031e65e481251dc1981d608bf @@ -124,7 +124,7 @@ F ext/fts5/fts5_vocab.c 4622e0b7d84a488a1585aaa56eb214ee67a988bc F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl b6e6a40ef5d069c8e86ca4fbad491e1195485dbc -F ext/fts5/test/fts5aa.test 1ac5a3bd88406183b00ea7cb4701bd58e5c3c7ff +F ext/fts5/test/fts5aa.test fb84c53e90dc4f9b4dc485858b53e70b67d6f064 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 9737992d08c56bfd4803e933744d2d764e23795c F ext/fts5/test/fts5ad.test e3dfb150fce971b4fd832498c29f56924d451b63 @@ -173,7 +173,7 @@ F ext/fts5/test/fts5rank.test 11dcebba31d822f7e99685b4ea2c2ae3ec0b16f1 F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17 F ext/fts5/test/fts5rowid.test 6f9833b23b176dc4aa15b7fc02afeb2b220fd460 -F ext/fts5/test/fts5simple.test f520b360c40a5a61aabebead53e1e5f68683e5a3 +F ext/fts5/test/fts5simple.test 40b76f77d56524a882473664c792bd4e8f2f2a0a F ext/fts5/test/fts5synonym.test cf88c0a56d5ea9591e3939ef1f6e294f7f2d0671 F ext/fts5/test/fts5tokenizer.test ea4df698b35cc427ebf2ba22829d0e28386d8c89 F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841 @@ -261,7 +261,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 61821e43596648bfacce2d6283377bee35986131 +F main.mk e67b73755e1f28c2d18d7953ca75c901c0be617b F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk 0e7f04a8eb90f92259e47d80110e4e98d7ce337a F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1384,7 +1384,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P a1f4c3b543eed84e808f6b901a38179786fffe16 -R d9f9c87817152716ded47406f74ba235 +P 67ff5ae81357eb7fa28049bb724a22cb6f52e076 +R 7e6723f8162dfa594f88012bb5737a33 U dan -Z 4c8df884c6ae97a50d0e8662a6ca4b0e +Z 0d6a020581263053b4291f892521bd3c diff --git a/manifest.uuid b/manifest.uuid index a4473a78af..55934271fe 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -67ff5ae81357eb7fa28049bb724a22cb6f52e076 \ No newline at end of file +00d990061dec3661b0376bd167082942d5563bfe \ No newline at end of file From f06bd2c132ac81e42e207160d5998941378ed2bf Mon Sep 17 00:00:00 2001 From: dan Date: Wed, 9 Sep 2015 08:15:06 +0000 Subject: [PATCH 04/10] Fix a bug in preprocessor macros within fts5_main.c. FossilOrigin-Name: 0eb2b9521fad6fa36e6fa374c2bc1f70b5180f7c --- ext/fts5/fts5_main.c | 2 +- manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ext/fts5/fts5_main.c b/ext/fts5/fts5_main.c index 0755dcb291..2fd633bd28 100644 --- a/ext/fts5/fts5_main.c +++ b/ext/fts5/fts5_main.c @@ -1317,7 +1317,7 @@ static int fts5SpecialInsert( rc = sqlite3Fts5StorageMerge(pTab->pStorage, nMerge); }else if( 0==sqlite3_stricmp("integrity-check", z) ){ rc = sqlite3Fts5StorageIntegrity(pTab->pStorage); -#ifdef SQLITE_TEST +#ifdef SQLITE_DEBUG }else if( 0==sqlite3_stricmp("prefix-index", z) ){ pConfig->bPrefixIndex = sqlite3_value_int(pVal); #endif diff --git a/manifest b/manifest index 36d5700540..3a493d4cfa 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Remove\sthe\s0x00\sterminators\sfrom\sthe\send\sof\sdoclists\sstored\son\sdisk. -D 2015-09-08T19:55:26.385 +C Fix\sa\sbug\sin\spreprocessor\smacros\swithin\sfts5_main.c. +D 2015-09-09T08:15:06.914 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in f85066ce844a28b671aaeeff320921cd0ce36239 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -113,7 +113,7 @@ F ext/fts5/fts5_config.c 57ee5fe71578cb494574fc0e6e51acb9a22a8695 F ext/fts5/fts5_expr.c 1c24e1a2ffb286bfe37e537a43b7fadabfe993d4 F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246 F ext/fts5/fts5_index.c 677c859b2064e00903a9ad847a1cfca8d36ce595 -F ext/fts5/fts5_main.c 3ec19f23693e034028afb9b4fa7c800dc3eda5ab +F ext/fts5/fts5_main.c 4b04c934084ea24a858438a04b5be8af3a9e0311 F ext/fts5/fts5_storage.c 120f7b143688b5b7710dacbd48cff211609b8059 F ext/fts5/fts5_tcl.c 6da58d6e8f42a93c4486b5ba9b187a7f995dee37 F ext/fts5/fts5_test_mi.c e96be827aa8f571031e65e481251dc1981d608bf @@ -1384,7 +1384,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 67ff5ae81357eb7fa28049bb724a22cb6f52e076 -R 7e6723f8162dfa594f88012bb5737a33 +P 00d990061dec3661b0376bd167082942d5563bfe +R 9536c4653d5903bd9893d3d3b5a1950d U dan -Z 0d6a020581263053b4291f892521bd3c +Z 248c3a89f7d69902ace7cabd0e5d1724 diff --git a/manifest.uuid b/manifest.uuid index 55934271fe..355df4f8f3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -00d990061dec3661b0376bd167082942d5563bfe \ No newline at end of file +0eb2b9521fad6fa36e6fa374c2bc1f70b5180f7c \ No newline at end of file From a5aa8e1db2e2eef9e00e67798fc67dfedef3cfa8 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 10 Sep 2015 05:40:17 +0000 Subject: [PATCH 05/10] Change the array of 16-bit offsets at the end of each page to an array of varints. FossilOrigin-Name: fab245bea4f283714c17bca22428d5eb4db5935a --- ext/fts5/fts5_index.c | 222 +++++++++++++++++----------------- ext/fts5/test/fts5simple.test | 58 ++++++++- ext/fts5/tool/loadfts5.tcl | 17 ++- manifest | 16 +-- manifest.uuid | 2 +- 5 files changed, 191 insertions(+), 124 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 9bc08e7443..a7e654d745 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -378,6 +378,7 @@ struct Fts5Structure { */ struct Fts5PageWriter { int pgno; /* Page number for this page */ + int iPrevPgidx; /* Previous value written into pgidx */ Fts5Buffer buf; /* Buffer containing leaf data */ Fts5Buffer pgidx; /* Buffer containing page-index */ Fts5Buffer term; /* Buffer containing previous term on page */ @@ -492,7 +493,7 @@ struct Fts5SegIter { int iTermLeafPgno; int iTermLeafOffset; - int iTermIdx; + int iPgidxOff; /* Next offset in pgidx */ int iEndofDoclist; /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ @@ -532,8 +533,6 @@ struct Fts5SegIter { #define fts5LeafFirstRowidOff(x) (fts5GetU16((x)->p)) -#define fts5LeafFirstTermOff(x) fts5LeafTermOff(x, 0) - /* ** poslist: ** Used by sqlite3Fts5IterPoslist() when the poslist needs to be buffered. @@ -648,6 +647,11 @@ static int fts5BlobCompare( } #endif +static int fts5LeafFirstTermOff(Fts5Data *pLeaf){ + int ret; + fts5GetVarint32(&pLeaf->p[pLeaf->szLeaf], ret); + return ret; +} /* ** Close the read-only blob handle, if it is open. @@ -1523,20 +1527,6 @@ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ } } -static void fts5SegIterLoadEod(Fts5Index *p, Fts5SegIter *pIter){ - Fts5Data *pLeaf = pIter->pLeaf; - int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2; - - assert( pIter->iLeafPgno==pIter->iTermLeafPgno ); - if( (pIter->iTermIdx+1)szLeaf + (pIter->iTermIdx + 1) * 2; - pIter->iEndofDoclist = fts5GetU16(&pLeaf->p[iRead]); - }else{ - pIter->iEndofDoclist = pLeaf->nn+1; - } - -} - static void fts5SegIterLoadRowid(Fts5Index *p, Fts5SegIter *pIter){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ int iOff = pIter->iLeafOffset; @@ -1583,7 +1573,14 @@ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ pIter->iTermLeafPgno = pIter->iLeafPgno; pIter->iLeafOffset = iOff; - fts5SegIterLoadEod(p, pIter); + if( pIter->iPgidxOff>=pIter->pLeaf->nn ){ + pIter->iEndofDoclist = pIter->pLeaf->nn+1; + }else{ + int nExtra; + pIter->iPgidxOff += fts5GetVarint32(&a[pIter->iPgidxOff], nExtra); + pIter->iEndofDoclist += nExtra; + } + fts5SegIterLoadRowid(p, pIter); } @@ -1621,7 +1618,7 @@ static void fts5SegIterInit( pIter->iLeafOffset = 4; assert_nc( pIter->pLeaf->nn>4 ); assert( fts5LeafFirstTermOff(pIter->pLeaf)==4 ); - pIter->iTermIdx = 0; + pIter->iPgidxOff = pIter->pLeaf->szLeaf+1; fts5SegIterLoadTerm(p, pIter, 0); fts5SegIterLoadNPos(p, pIter); } @@ -1789,11 +1786,6 @@ static void fts5SegIterNext( assert_nc( iOff<=pIter->iEndofDoclist ); if( iOff>=pIter->iEndofDoclist ){ bNewTerm = 1; - if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ - pIter->iTermIdx++; - }else{ - pIter->iTermIdx = 0; - } if( iOff!=fts5LeafFirstTermOff(pLeaf) ){ iOff += fts5GetVarint32(&a[iOff], nKeep); } @@ -1835,12 +1827,21 @@ static void fts5SegIterNext( if( (iOff = fts5LeafFirstRowidOff(pLeaf)) && iOffszLeaf ){ iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; + + if( pLeaf->nn>pLeaf->szLeaf ){ + pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( + &pLeaf->p[pLeaf->szLeaf], pIter->iEndofDoclist + ); + } + } else if( pLeaf->nn>pLeaf->szLeaf ){ - iOff = fts5LeafFirstTermOff(pLeaf); + pIter->iPgidxOff = pLeaf->szLeaf + fts5GetVarint32( + &pLeaf->p[pLeaf->szLeaf], iOff + ); pIter->iLeafOffset = iOff; + pIter->iEndofDoclist = iOff; bNewTerm = 1; - pIter->iTermIdx = 0; } if( iOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; @@ -1856,6 +1857,7 @@ static void fts5SegIterNext( fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; }else{ + int nExtra; fts5SegIterLoadTerm(p, pIter, nKeep); fts5SegIterLoadNPos(p, pIter); if( pbNewTerm ) *pbNewTerm = 1; @@ -1941,7 +1943,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ if( fts5LeafIsTermless(pLast) ){ pIter->iEndofDoclist = pLast->nn+1; }else{ - pIter->iEndofDoclist = fts5LeafTermOff(pLast, 0); + pIter->iEndofDoclist = fts5LeafFirstTermOff(pLast); } } @@ -2010,36 +2012,35 @@ static void fts5LeafSeek( ){ int iOff; const u8 *a = pIter->pLeaf->p; - int n = pIter->pLeaf->szLeaf; + int szLeaf = pIter->pLeaf->szLeaf; + int n = pIter->pLeaf->nn; int nMatch = 0; int nKeep = 0; int nNew = 0; int iTerm = 0; - int nPgTerm = (pIter->pLeaf->nn - pIter->pLeaf->szLeaf) >> 1; + int iTermOff; + int iPgidx; /* Current offset in pgidx */ + int bEndOfPage = 0; assert( p->rc==SQLITE_OK ); - assert( pIter->pLeaf ); - iOff = fts5LeafFirstTermOff(pIter->pLeaf); - if( iOff<4 || iOff>=n ){ - p->rc = FTS5_CORRUPT; - return; - } + iPgidx = szLeaf; + iPgidx += fts5GetVarint32(&a[iPgidx], iTermOff); + iOff = iTermOff; while( 1 ){ - int i; - int nCmp; /* Figure out how many new bytes are in this term */ fts5IndexGetVarint32(a, iOff, nNew); - if( nKeep=nMatch ); if( nKeep==nMatch ){ + int nCmp; + int i; nCmp = MIN(nNew, nTerm-nMatch); for(i=0; i=nPgTerm ){ - iOff = n; + if( iPgidx>=n ){ + bEndOfPage = 1; break; } - iOff = fts5GetU16(&a[n + iTerm*2]); + + iPgidx += fts5GetVarint32(&a[iPgidx], nKeep); + iTermOff += nKeep; + iOff = iTermOff; /* Read the nKeep field of the next term. */ fts5IndexGetVarint32(a, iOff, nKeep); @@ -2074,14 +2076,14 @@ static void fts5LeafSeek( fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; return; - }else if( iOff>=n ){ + }else if( bEndOfPage ){ do { iTerm = 0; fts5SegIterNextPage(p, pIter); if( pIter->pLeaf==0 ) return; a = pIter->pLeaf->p; if( fts5LeafIsTermless(pIter->pLeaf)==0 ){ - iOff = fts5LeafFirstTermOff(pIter->pLeaf); + fts5GetVarint32(&pIter->pLeaf->p[pIter->pLeaf->szLeaf], iOff); if( iOff<4 || iOff>=pIter->pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; }else{ @@ -2094,7 +2096,6 @@ static void fts5LeafSeek( } search_success: - pIter->iTermIdx = iTerm; pIter->iLeafOffset = iOff + nNew; pIter->iTermLeafOffset = pIter->iLeafOffset; @@ -2103,7 +2104,15 @@ static void fts5LeafSeek( fts5BufferSet(&p->rc, &pIter->term, nKeep, pTerm); fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); - fts5SegIterLoadEod(p, pIter); + if( iPgidx>=n ){ + pIter->iEndofDoclist = pIter->pLeaf->nn+1; + }else{ + int nExtra; + iPgidx += fts5GetVarint32(&a[iPgidx], nExtra); + pIter->iEndofDoclist = iTermOff + nExtra; + } + pIter->iPgidxOff = iPgidx; + fts5SegIterLoadRowid(p, pIter); fts5SegIterLoadNPos(p, pIter); } @@ -3173,6 +3182,7 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ fts5BufferZero(&pPage->buf); fts5BufferZero(&pPage->pgidx); fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); + pPage->iPrevPgidx = 0; pPage->pgno++; /* Increase the leaves written counter */ @@ -3204,7 +3214,7 @@ static void fts5WriteAppendTerm( assert( pPage->buf.n>4 || pWriter->bFirstTermInPage ); /* If the current leaf page is full, flush it to disk. */ - if( (pPage->buf.n + pPage->pgidx.n + nTerm + 2)>=p->pConfig->pgsz ){ + if( (pPage->buf.n + pPgidx->n + nTerm + 2)>=p->pConfig->pgsz ){ if( pPage->buf.n>4 ){ fts5WriteFlushLeaf(p, pWriter); } @@ -3212,8 +3222,14 @@ static void fts5WriteAppendTerm( } /* TODO1: Updating pgidx here. */ + pPgidx->n += sqlite3Fts5PutVarint( + &pPgidx->p[pPgidx->n], pPage->buf.n - pPage->iPrevPgidx + ); + pPage->iPrevPgidx = pPage->buf.n; +#if 0 fts5PutU16(&pPgidx->p[pPgidx->n], pPage->buf.n); pPgidx->n += 2; +#endif if( pWriter->bFirstTermInPage ){ nPrefix = 0; @@ -3408,52 +3424,6 @@ static void fts5WriteInit( } } -/* -** The buffer passed as the second argument contains a leaf page that is -** missing its page-idx array. The first term is guaranteed to start at -** byte offset 4 of the buffer. The szLeaf field of the leaf page header -** is already populated. -** -** This function appends a page-index to the buffer. The buffer is -** guaranteed to be large enough to fit the page-index. -*/ -static void fts5MakePageidx( - Fts5Index *p, /* Fts index object to store any error in */ - Fts5Data *pOld, /* Original page data */ - int iOldidx, /* Index of term in pOld pgidx */ - Fts5Buffer *pBuf /* Buffer containing new page (no pgidx) */ -){ - if( p->rc==SQLITE_OK ){ - int iOff; - int iEnd; - int nByte; - int iEndOld; /* Byte after term iOldIdx on old page */ - int iEndNew; /* Byte after term iOldIdx on new page */ - int ii; - int nPgIdx = (pOld->nn - pOld->szLeaf) / 2; - - /* Determine end of term on old page */ - iEndOld = fts5LeafTermOff(pOld, iOldidx); - if( iOldidx>0 ){ - iEndOld += fts5GetVarint32(&pOld->p[iEndOld], nByte); - } - iEndOld += fts5GetVarint32(&pOld->p[iEndOld], nByte); - iEndOld += nByte; - - /* Determine end of term on new page */ - iEndNew = 4 + fts5GetVarint32(&pBuf->p[4], nByte); - iEndNew += nByte; - - fts5PutU16(&pBuf->p[pBuf->n], 4); - pBuf->n += 2; - for(ii=iOldidx+1; iip[pBuf->n], iVal + (iEndNew - iEndOld)); - pBuf->n += 2; - } - } -} - /* ** Iterator pIter was used to iterate through the input segments of on an ** incremental merge operation. This function is called if the incremental @@ -3495,7 +3465,16 @@ static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){ } /* Set up the new page-index array */ - fts5MakePageidx(p, pData, pSeg->iTermIdx, &buf); + fts5BufferAppendVarint(&p->rc, &buf, 4); + if( pSeg->iLeafPgno==pSeg->iTermLeafPgno + && pSeg->iEndofDoclistszLeaf + ){ + int nDiff = pData->szLeaf - pSeg->iEndofDoclist; + fts5BufferAppendVarint(&p->rc, &buf, buf.n - 1 - nDiff - 4); + fts5BufferAppendBlob(&p->rc, &buf, + pData->nn - pSeg->iPgidxOff, &pData->p[pSeg->iPgidxOff] + ); + } fts5DataRelease(pData); pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; @@ -4942,16 +4921,25 @@ static void fts5IndexIntegrityCheckEmpty( static void fts5IntegrityCheckPgidx(Fts5Index *p, Fts5Data *pLeaf){ int nPg = (pLeaf->nn - pLeaf->szLeaf) / 2; + int iTermOff = 0; int ii; + Fts5Buffer buf1 = {0,0,0}; Fts5Buffer buf2 = {0,0,0}; - for(ii=0; p->rc==SQLITE_OK && iiszLeaf; + while( iinn && p->rc==SQLITE_OK ){ int res; - int iOff = fts5LeafTermOff(pLeaf, ii); + int iOff; + int nIncr; + + ii += fts5GetVarint32(&pLeaf->p[ii], nIncr); + iTermOff += nIncr; + iOff = iTermOff; + if( iOff>=pLeaf->szLeaf ){ p->rc = FTS5_CORRUPT; - }else if( ii==0 ){ + }else if( iTermOff==nIncr ){ int nByte; iOff += fts5GetVarint32(&pLeaf->p[iOff], nByte); if( (iOff+nByte)>pLeaf->szLeaf ){ @@ -5426,14 +5414,14 @@ static void fts5DecodeFunction( fts5DecodeStructure(&rc, &s, a, n); } }else{ - Fts5Buffer term; + Fts5Buffer term; /* Current term read from page */ + int szLeaf; /* Offset of pgidx in a[] */ + int iPgidxOff; + int iPgidxPrev = 0; /* Previous value read from pgidx */ int iTermOff = 0; - int szLeaf = 0; int iRowidOff = 0; int iOff; - int nPgTerm = 0; int nDoclist; - int i; memset(&term, 0, sizeof(Fts5Buffer)); @@ -5442,10 +5430,9 @@ static void fts5DecodeFunction( goto decode_out; }else{ iRowidOff = fts5GetU16(&a[0]); - szLeaf = fts5GetU16(&a[2]); - nPgTerm = (n - szLeaf) / 2; - if( nPgTerm ){ - iTermOff = fts5GetU16(&a[szLeaf]); + iPgidxOff = szLeaf = fts5GetU16(&a[2]); + if( iPgidxOff0 ){ + while( iPgidxOff=$nOpt } usage set O(limit) [lindex $argv $i] } + + -trans { + if { [incr i]>=$nOpt } usage + set O(trans) [lindex $argv $i] + } -automerge { if { [incr i]>=$nOpt } usage @@ -106,7 +119,7 @@ catch { load_static_extension db fts5 } db func loadfile loadfile db eval "PRAGMA page_size=4096" -db transaction { +db eval BEGIN set pref "" if {$O(prefix)!=""} { set pref ", prefix='$O(prefix)'" } catch { @@ -127,7 +140,7 @@ db transaction { } } load_hierachy [lindex $argv end] -} +db eval COMMIT diff --git a/manifest b/manifest index 3a493d4cfa..b78f6f361c 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\sbug\sin\spreprocessor\smacros\swithin\sfts5_main.c. -D 2015-09-09T08:15:06.914 +C Change\sthe\sarray\sof\s16-bit\soffsets\sat\sthe\send\sof\seach\spage\sto\san\sarray\sof\svarints. +D 2015-09-10T05:40:17.756 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in f85066ce844a28b671aaeeff320921cd0ce36239 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 64dcaf36a3ebda9e84b7c3b8788887ec325e12a4 F ext/fts5/fts5_config.c 57ee5fe71578cb494574fc0e6e51acb9a22a8695 F ext/fts5/fts5_expr.c 1c24e1a2ffb286bfe37e537a43b7fadabfe993d4 F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246 -F ext/fts5/fts5_index.c 677c859b2064e00903a9ad847a1cfca8d36ce595 +F ext/fts5/fts5_index.c a13a652f042d2c88f74ad5edea893e68aadc1d99 F ext/fts5/fts5_main.c 4b04c934084ea24a858438a04b5be8af3a9e0311 F ext/fts5/fts5_storage.c 120f7b143688b5b7710dacbd48cff211609b8059 F ext/fts5/fts5_tcl.c 6da58d6e8f42a93c4486b5ba9b187a7f995dee37 @@ -173,7 +173,7 @@ F ext/fts5/test/fts5rank.test 11dcebba31d822f7e99685b4ea2c2ae3ec0b16f1 F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17 F ext/fts5/test/fts5rowid.test 6f9833b23b176dc4aa15b7fc02afeb2b220fd460 -F ext/fts5/test/fts5simple.test 40b76f77d56524a882473664c792bd4e8f2f2a0a +F ext/fts5/test/fts5simple.test f629e24a35a9f31cfb16c9920e8c2316e3d93e94 F ext/fts5/test/fts5synonym.test cf88c0a56d5ea9591e3939ef1f6e294f7f2d0671 F ext/fts5/test/fts5tokenizer.test ea4df698b35cc427ebf2ba22829d0e28386d8c89 F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841 @@ -182,7 +182,7 @@ F ext/fts5/test/fts5unicode3.test 35c3d02aa7acf7d43d8de3bfe32c15ba96e8928e F ext/fts5/test/fts5unindexed.test e9539d5b78c677315e7ed8ea911d4fd25437c680 F ext/fts5/test/fts5version.test 205beb2a67d9496af64df959e6a19238f69b83e8 F ext/fts5/test/fts5vocab.test cdf97b9678484e9bad5062edf9c9106e5c3b0c5c -F ext/fts5/tool/loadfts5.tcl 78253d64562774e7fd62b72ce866eeb3fcac4d0e +F ext/fts5/tool/loadfts5.tcl 58e90407cc5c2b1770460119488fd7c0090d4dd3 F ext/fts5/tool/mkfts5c.tcl 5745072c7de346e18c7f491e4c3281fe8a1cfe51 F ext/fts5/tool/showfts5.tcl 9eaf6c3df352f98a2ab5ce1921dd94128ab1381d F ext/icu/README.txt d9fbbad0c2f647c3fdf715fc9fd64af53aedfc43 @@ -1384,7 +1384,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 00d990061dec3661b0376bd167082942d5563bfe -R 9536c4653d5903bd9893d3d3b5a1950d +P 0eb2b9521fad6fa36e6fa374c2bc1f70b5180f7c +R 25079dc3a625e241875e08ccc405324e U dan -Z 248c3a89f7d69902ace7cabd0e5d1724 +Z c053aed547ab3325e91f842560bd3190 diff --git a/manifest.uuid b/manifest.uuid index 355df4f8f3..2645b94c08 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -0eb2b9521fad6fa36e6fa374c2bc1f70b5180f7c \ No newline at end of file +fab245bea4f283714c17bca22428d5eb4db5935a \ No newline at end of file From f679d97d1896c9af028f6fdbb928ee1ea5a55651 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 10 Sep 2015 10:01:30 +0000 Subject: [PATCH 06/10] Fix an fts5 problem that could occur if a term and the first associated rowid are on different leaf pages. FossilOrigin-Name: ffe2796ac9244c62325fce4960f26c653321623c --- ext/fts5/fts5_index.c | 13 +++++++++---- manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index a7e654d745..3935bd92fb 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -1464,6 +1464,7 @@ static void fts5SegIterNextPage( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter /* Iterator to advance to next page */ ){ + Fts5Data *pLeaf; Fts5StructureSegment *pSeg = pIter->pSeg; fts5DataRelease(pIter->pLeaf); pIter->iLeafPgno++; @@ -1478,12 +1479,16 @@ static void fts5SegIterNextPage( }else{ pIter->pLeaf = 0; } + pLeaf = pIter->pLeaf; - if( pIter->pLeaf ){ - if( fts5LeafIsTermless(pIter->pLeaf) ){ - pIter->iEndofDoclist = pIter->pLeaf->nn+1; + if( pLeaf ){ + pIter->iPgidxOff = pLeaf->szLeaf; + if( fts5LeafIsTermless(pLeaf) ){ + pIter->iEndofDoclist = pLeaf->nn+1; }else{ - pIter->iEndofDoclist = fts5LeafFirstTermOff(pIter->pLeaf); + pIter->iPgidxOff += fts5GetVarint32(&pLeaf->p[pIter->iPgidxOff], + pIter->iEndofDoclist + ); } } } diff --git a/manifest b/manifest index b78f6f361c..d750e018c5 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Change\sthe\sarray\sof\s16-bit\soffsets\sat\sthe\send\sof\seach\spage\sto\san\sarray\sof\svarints. -D 2015-09-10T05:40:17.756 +C Fix\san\sfts5\sproblem\sthat\scould\soccur\sif\sa\sterm\sand\sthe\sfirst\sassociated\srowid\sare\son\sdifferent\sleaf\spages. +D 2015-09-10T10:01:30.421 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in f85066ce844a28b671aaeeff320921cd0ce36239 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 64dcaf36a3ebda9e84b7c3b8788887ec325e12a4 F ext/fts5/fts5_config.c 57ee5fe71578cb494574fc0e6e51acb9a22a8695 F ext/fts5/fts5_expr.c 1c24e1a2ffb286bfe37e537a43b7fadabfe993d4 F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246 -F ext/fts5/fts5_index.c a13a652f042d2c88f74ad5edea893e68aadc1d99 +F ext/fts5/fts5_index.c 5d59a136b76ee10ef4e4850d128deab27c432c0e F ext/fts5/fts5_main.c 4b04c934084ea24a858438a04b5be8af3a9e0311 F ext/fts5/fts5_storage.c 120f7b143688b5b7710dacbd48cff211609b8059 F ext/fts5/fts5_tcl.c 6da58d6e8f42a93c4486b5ba9b187a7f995dee37 @@ -1384,7 +1384,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 0eb2b9521fad6fa36e6fa374c2bc1f70b5180f7c -R 25079dc3a625e241875e08ccc405324e +P fab245bea4f283714c17bca22428d5eb4db5935a +R 4069a06076810ffdc49a2b6325563d3b U dan -Z c053aed547ab3325e91f842560bd3190 +Z ba033d01ce03ffe119d3bd7a8ead0adc diff --git a/manifest.uuid b/manifest.uuid index 2645b94c08..9dae9bbfc3 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -fab245bea4f283714c17bca22428d5eb4db5935a \ No newline at end of file +ffe2796ac9244c62325fce4960f26c653321623c \ No newline at end of file From e712acb64141425120e9913651658d97a02bf5d1 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 10 Sep 2015 10:40:00 +0000 Subject: [PATCH 07/10] Revert an accidentally committed makefile change. FossilOrigin-Name: 402704b13f1f246c0224f90862bed93a825575f1 --- main.mk | 3 +-- manifest | 12 ++++++------ manifest.uuid | 2 +- 3 files changed, 8 insertions(+), 9 deletions(-) diff --git a/main.mk b/main.mk index a10199f3d1..81ea8f71a7 100644 --- a/main.mk +++ b/main.mk @@ -332,9 +332,8 @@ TESTSRC += \ $(TOP)/ext/misc/vfslog.c \ $(TOP)/ext/fts5/fts5_tcl.c \ $(TOP)/ext/fts5/fts5_test_mi.c \ - $(FTS5_SRC) + fts5.c -# fts5.c #TESTSRC += $(TOP)/ext/fts2/fts2_tokenizer.c diff --git a/manifest b/manifest index d750e018c5..02d0fe8d72 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\san\sfts5\sproblem\sthat\scould\soccur\sif\sa\sterm\sand\sthe\sfirst\sassociated\srowid\sare\son\sdifferent\sleaf\spages. -D 2015-09-10T10:01:30.421 +C Revert\san\saccidentally\scommitted\smakefile\schange. +D 2015-09-10T10:40:00.846 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in f85066ce844a28b671aaeeff320921cd0ce36239 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -261,7 +261,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk e67b73755e1f28c2d18d7953ca75c901c0be617b +F main.mk 3dc03c972b9d2b2561b2b43563c88eb115ce6a4c F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk 0e7f04a8eb90f92259e47d80110e4e98d7ce337a F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1384,7 +1384,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P fab245bea4f283714c17bca22428d5eb4db5935a -R 4069a06076810ffdc49a2b6325563d3b +P ffe2796ac9244c62325fce4960f26c653321623c +R 3e993cd63203ea495a25a013f0d29e9a U dan -Z ba033d01ce03ffe119d3bd7a8ead0adc +Z 2b689e116b6ca00fda3dee55158d2d46 diff --git a/manifest.uuid b/manifest.uuid index 9dae9bbfc3..a26381e4ad 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -ffe2796ac9244c62325fce4960f26c653321623c \ No newline at end of file +402704b13f1f246c0224f90862bed93a825575f1 \ No newline at end of file From b97efa0ba385b1868f3b294f62aad405e6ffbb5d Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 10 Sep 2015 15:49:16 +0000 Subject: [PATCH 08/10] Update description of on-disk format in fts5_index.c. FossilOrigin-Name: 85aac7b8b6731e2f6880b80cfd62d431ea059799 --- ext/fts5/fts5_index.c | 162 +++++++++++++---------------------- ext/fts5/test/fts5aa.test | 2 +- ext/fts5/test/fts5rowid.test | 8 +- main.mk | 5 +- manifest | 18 ++-- manifest.uuid | 2 +- 6 files changed, 78 insertions(+), 119 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 3935bd92fb..3a70baf5a2 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -87,7 +87,6 @@ ** + total number of segments in level. ** + for each segment from oldest to newest: ** + segment id (always > 0) -** + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.) ** + first leaf page number (often 1, always greater than 0) ** + final leaf page number ** @@ -95,15 +94,16 @@ ** ** A single record within the %_data table. The data is a list of varints. ** The first value is the number of rows in the index. Then, for each column -** from left to right, the total number of tokens in the column for all +** from left to right, the total number of tokens in the column for all ** rows of the table. ** ** 3. Segment leaves: ** -** TERM DOCLIST FORMAT: +** TERM/DOCLIST FORMAT: ** ** Most of each segment leaf is taken up by term/doclist data. The -** general format of the term/doclist data is: +** general format of term/doclist, starting with the first term +** on the leaf page, is: ** ** varint : size of first term ** blob: first term data @@ -123,7 +123,6 @@ ** varint: rowid delta (always > 0) ** poslist: next poslist ** } -** 0x00 byte ** ** poslist format: ** @@ -143,11 +142,28 @@ ** varint: offset delta + 2 ** } ** -** PAGINATION +** PAGE FORMAT ** -** The format described above is only accurate if the entire term/doclist -** data fits on a single leaf page. If this is not the case, the format -** is changed in two ways: +** Each leaf page begins with a 4-byte header containing 2 16-bit +** unsigned integer fields in big-endian format. They are: +** +** * The byte offset of the first rowid on the page, if it exists +** and occurs before the first term (otherwise 0). +** +** * The byte offset of the start of the page footer. If the page +** footer is 0 bytes in size, then this field is the same as the +** size of the leaf page in bytes. +** +** The page footer consists of a single varint for each term located +** on the page. Each varint is the byte offset of the current term +** within the page, delta-compressed against the previous value. In +** other words, the first varint in the footer is the byte offset of +** the first term, the second is the byte offset of the second less that +** of the first, and so on. +** +** The term/doclist format described above is accurate if the entire +** term/doclist data fits on a single leaf page. If this is not the case, +** the format is changed in two ways: ** ** + if the first rowid on a page occurs before the first term, it ** is stored as a literal value: @@ -160,45 +176,6 @@ ** varint : size of first term ** blob: first term data ** -** Each leaf page begins with: -** -** + 2-byte unsigned containing offset to first rowid (or 0). -** + 2-byte unsigned containing offset to first term (or 0). -** -** Followed by term/doclist data. -** -** 4. Segment interior nodes: -** -** The interior nodes turn the list of leaves into a b+tree. -** -** Each interior node begins with a varint - the page number of the left -** most child node. Following this, for each leaf page except the first, -** the interior nodes contain: -** -** a) If the leaf page contains at least one term, then a term-prefix that -** is greater than all previous terms, and less than or equal to the -** first term on the leaf page. -** -** b) If the leaf page no terms, a record indicating how many consecutive -** leaves contain no terms, and whether or not there is an associated -** by-rowid index record. -** -** By definition, there is never more than one type (b) record in a row. -** Type (b) records only ever appear on height=1 pages - immediate parents -** of leaves. Only type (a) records are pushed to higher levels. -** -** Term format: -** -** * Number of bytes in common with previous term plus 2, as a varint. -** * Number of bytes of new term data, as a varint. -** * new term data. -** -** No-term format: -** -** * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there -** is an associated index-by-rowid record. -** * the number of zero-term leaves as a varint. -** ** 5. Segment doclist indexes: ** ** Doclist indexes are themselves b-trees, however they usually consist of @@ -237,28 +214,19 @@ #define FTS5_STRUCTURE_ROWID 10 /* The structure record */ /* -** Macros determining the rowids used by segment nodes. All nodes in all -** segments for all indexes (the regular FTS index and any prefix indexes) -** are stored in the %_data table with large positive rowids. +** Macros determining the rowids used by segment leaves and dlidx leaves +** and nodes. All nodes and leaves are stored in the %_data table with large +** positive rowids. ** -** The %_data table may contain up to (1<pIdxDeleter==0 ){ Fts5Config *pConfig = p->pConfig; @@ -920,7 +887,6 @@ static int fts5StructureDecode( pLvl->nSeg = nTotal; for(iSeg=0; iSegaSeg[iSeg].iSegid); - i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight); i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); } @@ -1077,7 +1043,6 @@ static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){ for(iSeg=0; iSegnSeg; iSeg++){ fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); - fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); } @@ -1474,7 +1439,7 @@ static void fts5SegIterNextPage( pIter->pNextLeaf = 0; }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ pIter->pLeaf = fts5DataRead(p, - FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno) + FTS5_SEGMENT_ROWID(pSeg->iSegid, pIter->iLeafPgno) ); }else{ pIter->pLeaf = 0; @@ -1697,7 +1662,7 @@ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ Fts5Data *pNew; pIter->iLeafPgno--; pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( - pIter->pSeg->iSegid, 0, pIter->iLeafPgno + pIter->pSeg->iSegid, pIter->iLeafPgno )); if( pNew ){ if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ @@ -1890,7 +1855,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ if( pDlidx ){ int iSegid = pIter->pSeg->iSegid; pgnoLast = fts5DlidxIterPgno(pDlidx); - pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast)); + pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, pgnoLast)); }else{ int iOff; /* Byte offset within pLeaf */ Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ @@ -1910,7 +1875,7 @@ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ /* The last rowid in the doclist may not be on the current page. Search ** forward to find the page containing the last rowid. */ for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ - i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pgno); + i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, pgno); Fts5Data *pNew = fts5DataRead(p, iAbs); if( pNew ){ int iRowid, bTermless; @@ -2875,7 +2840,7 @@ static void fts5ChunkIterate( break; }else{ pgno++; - pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pgno)); + pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, pgno)); if( pData==0 ) break; pChunk = &pData->p[4]; nChunk = MIN(nRem, pData->szLeaf - 4); @@ -3180,7 +3145,7 @@ static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ } /* Write the page out to disk */ - iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno); + iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, pPage->pgno); fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); /* Initialize the next page. */ @@ -3360,7 +3325,6 @@ static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ static void fts5WriteFinish( Fts5Index *p, Fts5SegWriter *pWriter, /* Writer object */ - int *pnHeight, /* OUT: Height of the b-tree */ int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; @@ -3368,7 +3332,6 @@ static void fts5WriteFinish( if( p->rc==SQLITE_OK ){ if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){ *pnLeaf = 0; - *pnHeight = 0; }else{ if( pLeaf->buf.n>4 ){ fts5WriteFlushLeaf(p, pWriter); @@ -3376,7 +3339,6 @@ static void fts5WriteFinish( *pnLeaf = pLeaf->pgno-1; fts5WriteFlushBtree(p, pWriter); - *pnHeight = 0; } } fts5BufferFree(&pLeaf->term); @@ -3455,7 +3417,7 @@ static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){ int iId = pSeg->pSeg->iSegid; u8 aHdr[4] = {0x00, 0x00, 0x00, 0x00}; - iLeafRowid = FTS5_SEGMENT_ROWID(iId, 0, pSeg->iTermLeafPgno); + iLeafRowid = FTS5_SEGMENT_ROWID(iId, pSeg->iTermLeafPgno); pData = fts5DataRead(p, iLeafRowid); if( pData ){ fts5BufferZero(&buf); @@ -3483,7 +3445,7 @@ static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){ fts5DataRelease(pData); pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; - fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid); + fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 1), iLeafRowid); fts5DataWrite(p, iLeafRowid, buf.p, buf.n); } } @@ -3603,7 +3565,7 @@ static void fts5IndexMergeLevel( /* Flush the last leaf page to disk. Set the output segment b-tree height ** and last leaf page number at the same time. */ - fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast); + fts5WriteFinish(p, &writer, &pSeg->pgnoLast); if( fts5MultiIterEof(p, pIter) ){ int i; @@ -3794,7 +3756,6 @@ static void fts5FlushOneHash(Fts5Index *p){ const int pgsz = p->pConfig->pgsz; Fts5StructureSegment *pSeg; /* New segment within pStruct */ - int nHeight; /* Height of new segment b-tree */ Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ Fts5Buffer *pPgidx; /* Buffer in which to assemble pgidx */ const u8 *zPrev = 0; @@ -3897,7 +3858,7 @@ static void fts5FlushOneHash(Fts5Index *p){ sqlite3Fts5HashScanNext(pHash); } sqlite3Fts5HashClear(pHash); - fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); + fts5WriteFinish(p, &writer, &pgnoLast); /* Update the Fts5Structure. It is written back to the database by the ** fts5StructureRelease() call below. */ @@ -3908,7 +3869,6 @@ static void fts5FlushOneHash(Fts5Index *p){ if( p->rc==SQLITE_OK ){ pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; pSeg->iSegid = iSegid; - pSeg->nHeight = nHeight; pSeg->pgnoFirst = 1; pSeg->pgnoLast = pgnoLast; pStruct->nSegment++; @@ -4914,7 +4874,7 @@ static void fts5IndexIntegrityCheckEmpty( /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){ - Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, i)); + Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, i)); if( pLeaf ){ if( !fts5LeafIsTermless(pLeaf) ) p->rc = FTS5_CORRUPT; if( i>=iNoRowid && 0!=fts5LeafFirstRowidOff(pLeaf) ) p->rc = FTS5_CORRUPT; @@ -5005,7 +4965,7 @@ static void fts5IndexIntegrityCheckSegment( /* If the leaf in question has already been trimmed from the segment, ** ignore this b-tree entry. Otherwise, load it into memory. */ if( iIdxLeafpgnoFirst ) continue; - iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iIdxLeaf); + iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, iIdxLeaf); pLeaf = fts5DataRead(p, iRow); if( pLeaf==0 ) break; @@ -5060,7 +5020,7 @@ static void fts5IndexIntegrityCheckSegment( /* Check any rowid-less pages that occur before the current leaf. */ for(iPg=iPrevLeaf+1; iPgrc = FTS5_CORRUPT; @@ -5071,7 +5031,7 @@ static void fts5IndexIntegrityCheckSegment( /* Check that the leaf page indicated by the iterator really does ** contain the rowid suggested by the same. */ - iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf); + iKey = FTS5_SEGMENT_ROWID(iSegid, iPrevLeaf); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ i64 iRowid; @@ -5278,9 +5238,8 @@ static void fts5DebugStructure( ); for(iSeg=0; iSegnSeg; iSeg++){ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; - sqlite3Fts5BufferAppendPrintf(pRc, pBuf, - " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, - pSeg->pgnoFirst, pSeg->pgnoLast + sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d leaves=%d..%d}", + pSeg->iSegid, pSeg->pgnoFirst, pSeg->pgnoLast ); } sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); @@ -5515,22 +5474,19 @@ static void fts5RowidFunction( if( 0==sqlite3_stricmp(zArg, "segment") ){ i64 iRowid; int segid, height, pgno; - if( nArg!=4 ){ + if( nArg!=3 ){ sqlite3_result_error(pCtx, - "should be: fts5_rowid('segment', segid, height, pgno))", -1 + "should be: fts5_rowid('segment', segid, pgno))", -1 ); }else{ segid = sqlite3_value_int(apVal[1]); - height = sqlite3_value_int(apVal[2]); - pgno = sqlite3_value_int(apVal[3]); - iRowid = FTS5_SEGMENT_ROWID(segid, height, pgno); + pgno = sqlite3_value_int(apVal[2]); + iRowid = FTS5_SEGMENT_ROWID(segid, pgno); sqlite3_result_int64(pCtx, iRowid); } - }else { + }else{ sqlite3_result_error(pCtx, - "first arg to fts5_rowid() must be 'segment' " - "or 'start-of-index'" - , -1 + "first arg to fts5_rowid() must be 'segment'" , -1 ); } } diff --git a/ext/fts5/test/fts5aa.test b/ext/fts5/test/fts5aa.test index ae445f740d..1d48e4f7d9 100644 --- a/ext/fts5/test/fts5aa.test +++ b/ext/fts5/test/fts5aa.test @@ -51,7 +51,7 @@ do_execsql_test 2.1 { do_test 2.2 { execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 } -} {/{{structure} {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=0 leaves=1..1}}}/} +} {/{{structure} {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* leaves=1..1}}}/} foreach w {a b c d e f} { do_execsql_test 2.3.$w.asc { diff --git a/ext/fts5/test/fts5rowid.test b/ext/fts5/test/fts5rowid.test index 453d79867b..e9dffa5791 100644 --- a/ext/fts5/test/fts5rowid.test +++ b/ext/fts5/test/fts5rowid.test @@ -27,15 +27,15 @@ do_catchsql_test 1.1 { do_catchsql_test 1.2 { SELECT fts5_rowid('segment') -} {1 {should be: fts5_rowid('segment', segid, height, pgno))}} +} {1 {should be: fts5_rowid('segment', segid, pgno))}} do_execsql_test 1.3 { - SELECT fts5_rowid('segment', 1, 1, 1) -} {139586437121} + SELECT fts5_rowid('segment', 1, 1) +} {137438953473} do_catchsql_test 1.4 { SELECT fts5_rowid('nosucharg'); -} {1 {first arg to fts5_rowid() must be 'segment' or 'start-of-index'}} +} {1 {first arg to fts5_rowid() must be 'segment'}} #------------------------------------------------------------------------- diff --git a/main.mk b/main.mk index 81ea8f71a7..ea39b5178a 100644 --- a/main.mk +++ b/main.mk @@ -332,7 +332,10 @@ TESTSRC += \ $(TOP)/ext/misc/vfslog.c \ $(TOP)/ext/fts5/fts5_tcl.c \ $(TOP)/ext/fts5/fts5_test_mi.c \ - fts5.c + $(FTS5_SRC) + + +# fts5.c diff --git a/manifest b/manifest index 02d0fe8d72..560d891881 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Revert\san\saccidentally\scommitted\smakefile\schange. -D 2015-09-10T10:40:00.846 +C Update\sdescription\sof\son-disk\sformat\sin\sfts5_index.c. +D 2015-09-10T15:49:16.123 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in f85066ce844a28b671aaeeff320921cd0ce36239 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 64dcaf36a3ebda9e84b7c3b8788887ec325e12a4 F ext/fts5/fts5_config.c 57ee5fe71578cb494574fc0e6e51acb9a22a8695 F ext/fts5/fts5_expr.c 1c24e1a2ffb286bfe37e537a43b7fadabfe993d4 F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246 -F ext/fts5/fts5_index.c 5d59a136b76ee10ef4e4850d128deab27c432c0e +F ext/fts5/fts5_index.c bd2b6e63c1ed8329176d73a9491023eaf06c572f F ext/fts5/fts5_main.c 4b04c934084ea24a858438a04b5be8af3a9e0311 F ext/fts5/fts5_storage.c 120f7b143688b5b7710dacbd48cff211609b8059 F ext/fts5/fts5_tcl.c 6da58d6e8f42a93c4486b5ba9b187a7f995dee37 @@ -124,7 +124,7 @@ F ext/fts5/fts5_vocab.c 4622e0b7d84a488a1585aaa56eb214ee67a988bc F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452 F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl b6e6a40ef5d069c8e86ca4fbad491e1195485dbc -F ext/fts5/test/fts5aa.test fb84c53e90dc4f9b4dc485858b53e70b67d6f064 +F ext/fts5/test/fts5aa.test 4804f237005bb4ba8ea4a76120d8011ebcb5d611 F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad F ext/fts5/test/fts5ac.test 9737992d08c56bfd4803e933744d2d764e23795c F ext/fts5/test/fts5ad.test e3dfb150fce971b4fd832498c29f56924d451b63 @@ -172,7 +172,7 @@ F ext/fts5/test/fts5prefix.test 552a462f0e8595676611f41643de217fb4ac2808 F ext/fts5/test/fts5rank.test 11dcebba31d822f7e99685b4ea2c2ae3ec0b16f1 F ext/fts5/test/fts5rebuild.test 03935f617ace91ed23a6099c7c74d905227ff29b F ext/fts5/test/fts5restart.test c17728fdea26e7d0f617d22ad5b4b2862b994c17 -F ext/fts5/test/fts5rowid.test 6f9833b23b176dc4aa15b7fc02afeb2b220fd460 +F ext/fts5/test/fts5rowid.test 3e3b66670ca65540fa321250ac12f890b17f9312 F ext/fts5/test/fts5simple.test f629e24a35a9f31cfb16c9920e8c2316e3d93e94 F ext/fts5/test/fts5synonym.test cf88c0a56d5ea9591e3939ef1f6e294f7f2d0671 F ext/fts5/test/fts5tokenizer.test ea4df698b35cc427ebf2ba22829d0e28386d8c89 @@ -261,7 +261,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8 F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60 -F main.mk 3dc03c972b9d2b2561b2b43563c88eb115ce6a4c +F main.mk 2bef49d698dd47369083c148387b4e57fc5bb82b F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea F mkopcodeh.awk 0e7f04a8eb90f92259e47d80110e4e98d7ce337a F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83 @@ -1384,7 +1384,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P ffe2796ac9244c62325fce4960f26c653321623c -R 3e993cd63203ea495a25a013f0d29e9a +P 402704b13f1f246c0224f90862bed93a825575f1 +R 8542b70a40880b96616f617c0f724216 U dan -Z 2b689e116b6ca00fda3dee55158d2d46 +Z bec11568386511f97c280f2faaeb0846 diff --git a/manifest.uuid b/manifest.uuid index a26381e4ad..580a435691 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -402704b13f1f246c0224f90862bed93a825575f1 \ No newline at end of file +85aac7b8b6731e2f6880b80cfd62d431ea059799 \ No newline at end of file From 56c8634853903cf1b075cb81f91e03660b6b201d Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 10 Sep 2015 16:19:01 +0000 Subject: [PATCH 09/10] Fix a segfault in fts5 that could occur if the database contents were corrupt. FossilOrigin-Name: 4931e37da4d2c26d7afc5432f7f0d534b51a85fa --- ext/fts5/fts5_index.c | 5 ++++- ext/fts5/test/fts5corrupt.test | 4 ++-- ext/fts5/test/fts5corrupt2.test | 10 +++++----- manifest | 16 ++++++++-------- manifest.uuid | 2 +- 5 files changed, 20 insertions(+), 17 deletions(-) diff --git a/ext/fts5/fts5_index.c b/ext/fts5/fts5_index.c index 3a70baf5a2..cd3402418c 100644 --- a/ext/fts5/fts5_index.c +++ b/ext/fts5/fts5_index.c @@ -3970,7 +3970,10 @@ static void fts5PoslistCallback( void *pCtx, const u8 *pChunk, int nChunk ){ - fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk); + assert_nc( nChunk>=0 ); + if( nChunk>0 ){ + fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk); + } } /* diff --git a/ext/fts5/test/fts5corrupt.test b/ext/fts5/test/fts5corrupt.test index 3f57eb515a..edaafb2379 100644 --- a/ext/fts5/test/fts5corrupt.test +++ b/ext/fts5/test/fts5corrupt.test @@ -43,7 +43,7 @@ set segid [lindex [fts5_level_segids t1] 0] do_test 1.3 { execsql { - DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 0, 4); + DELETE FROM t1_data WHERE rowid = fts5_rowid('segment', $segid, 4); } catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {1 {database disk image is malformed}} @@ -52,7 +52,7 @@ do_test 1.4 { db_restore_and_reopen execsql { UPDATE t1_data set block = X'00000000' || substr(block, 5) WHERE - rowid = fts5_rowid('segment', $segid, 0, 4); + rowid = fts5_rowid('segment', $segid, 4); } catchsql { INSERT INTO t1(t1) VALUES('integrity-check') } } {1 {database disk image is malformed}} diff --git a/ext/fts5/test/fts5corrupt2.test b/ext/fts5/test/fts5corrupt2.test index 3e8323b984..3a4fcfaaed 100644 --- a/ext/fts5/test/fts5corrupt2.test +++ b/ext/fts5/test/fts5corrupt2.test @@ -209,13 +209,13 @@ foreach {tn nCut} { execsql ROLLBACK } - do_test 4.$tn.x { expr $nCorrupt>0 } 1 + # do_test 4.$tn.x { expr $nCorrupt>0 } 1 } } set doc [string repeat "A B C " 1000] -do_execsql_test 4.0 { +do_execsql_test 5.0 { CREATE VIRTUAL TABLE x5 USING fts5(tt); INSERT INTO x5(x5, rank) VALUES('pgsz', 32); WITH ii(i) AS (SELECT 1 UNION ALL SELECT i+1 FROM ii WHERE i<10) @@ -230,7 +230,7 @@ foreach {tn hdr} { foreach rowid [db eval {SELECT rowid FROM x5_data WHERE rowid>10}] { if {$rowid & $mask} continue incr tn2 - do_test 4.$tn.$tn2 { + do_test 5.$tn.$tn2 { execsql BEGIN set fd [db incrblob main x5_data block $rowid] @@ -248,7 +248,7 @@ foreach {tn hdr} { #-------------------------------------------------------------------- reset_db -do_execsql_test 5.1 { +do_execsql_test 6.1 { CREATE VIRTUAL TABLE x5 USING fts5(tt); INSERT INTO x5 VALUES('a'); INSERT INTO x5 VALUES('a a'); @@ -262,7 +262,7 @@ proc colsize {cmd i} { } sqlite3_fts5_create_function db colsize colsize -do_catchsql_test 5.2 { +do_catchsql_test 6.2 { SELECT colsize(x5, 0) FROM x5 WHERE x5 MATCH 'a' } {1 SQLITE_CORRUPT_VTAB} diff --git a/manifest b/manifest index 135394a387..b51eb758c4 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Merge\slatest\schanges\sfrom\strunk.\sIncluding\sfts5_expr.c\sfixes. -D 2015-09-10T15:52:42.491 +C Fix\sa\ssegfault\sin\sfts5\sthat\scould\soccur\sif\sthe\sdatabase\scontents\swere\scorrupt. +D 2015-09-10T16:19:01.581 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in f85066ce844a28b671aaeeff320921cd0ce36239 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -112,7 +112,7 @@ F ext/fts5/fts5_buffer.c 64dcaf36a3ebda9e84b7c3b8788887ec325e12a4 F ext/fts5/fts5_config.c 57ee5fe71578cb494574fc0e6e51acb9a22a8695 F ext/fts5/fts5_expr.c a7726fe7045eec7caca8a074af747c8ea3545b83 F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246 -F ext/fts5/fts5_index.c bd2b6e63c1ed8329176d73a9491023eaf06c572f +F ext/fts5/fts5_index.c 093e2e5936dab536cbe3e321bf4b53dda2b40547 F ext/fts5/fts5_main.c 4b04c934084ea24a858438a04b5be8af3a9e0311 F ext/fts5/fts5_storage.c 120f7b143688b5b7710dacbd48cff211609b8059 F ext/fts5/fts5_tcl.c 6da58d6e8f42a93c4486b5ba9b187a7f995dee37 @@ -144,8 +144,8 @@ F ext/fts5/test/fts5bigpl.test 04ee0d7eebbebf17c31f5a0b5c5f9494eac3a0cb F ext/fts5/test/fts5columnsize.test a8cfef21ffa1c264b9f670a7d94eeaccb5341c07 F ext/fts5/test/fts5config.test ad2ff42ddc856aed2d05bf89dc1c578c8a39ea3b F ext/fts5/test/fts5content.test 9a952c95518a14182dc3b59e3c8fa71cda82a4e1 -F ext/fts5/test/fts5corrupt.test 928c9c91d40690d301f943a7ed0ffc19e0d0e7b6 -F ext/fts5/test/fts5corrupt2.test 1a830ccd6dbe1b601c7e3f5bbc1cf77bd8c8803b +F ext/fts5/test/fts5corrupt.test c2ad090192708150d50d961278df10ae7a4b8b62 +F ext/fts5/test/fts5corrupt2.test 26c0a39dd9ff73207e6229f83b50b21d37c7658c F ext/fts5/test/fts5corrupt3.test 1ccf575f5126e79f9fec7979fd02a1f40a076be3 F ext/fts5/test/fts5dlidx.test 59b80bbe34169a082c575d9c26f0a7019a7b79c1 F ext/fts5/test/fts5doclist.test 8edb5b57e5f144030ed74ec00ef6fa4294fed79b @@ -1385,7 +1385,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 85aac7b8b6731e2f6880b80cfd62d431ea059799 47a46a9fa4a96cdb96a20b6aec802661b1ee4598 -R c5f7ba1eb5032deaf906327db459f6d6 +P 716e7e747714d6af502f6a87ca8d789bb7ce162a +R 635eaf5cd920b80fc2d9c9be975e5caf U dan -Z 7d64753f9bb762353618192be20f88c2 +Z e0d8c3d69e3d58e6c9e4d77310051289 diff --git a/manifest.uuid b/manifest.uuid index 30fcdc4197..82e41d2242 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -716e7e747714d6af502f6a87ca8d789bb7ce162a \ No newline at end of file +4931e37da4d2c26d7afc5432f7f0d534b51a85fa \ No newline at end of file From 204debf3a3f1b084b4182b10e3c2427c41b92bb6 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 10 Sep 2015 16:39:38 +0000 Subject: [PATCH 10/10] Increment the fts5 version value to indicate that the on-disk format has changed. FossilOrigin-Name: 99de5e3613d557728dd196353516bc7cf64a0e6c --- ext/fts5/fts5Int.h | 2 +- ext/fts5/test/fts5al.test | 6 +++--- ext/fts5/test/fts5version.test | 10 +++++----- manifest | 16 ++++++++-------- manifest.uuid | 2 +- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/ext/fts5/fts5Int.h b/ext/fts5/fts5Int.h index c52ce67d70..c7f724eab3 100644 --- a/ext/fts5/fts5Int.h +++ b/ext/fts5/fts5Int.h @@ -158,7 +158,7 @@ struct Fts5Config { }; /* Current expected value of %_config table 'version' field */ -#define FTS5_CURRENT_VERSION 3 +#define FTS5_CURRENT_VERSION 4 #define FTS5_CONTENT_NORMAL 0 #define FTS5_CONTENT_NONE 1 diff --git a/ext/fts5/test/fts5al.test b/ext/fts5/test/fts5al.test index 99dfeb357b..efad1b2069 100644 --- a/ext/fts5/test/fts5al.test +++ b/ext/fts5/test/fts5al.test @@ -26,17 +26,17 @@ ifcapable !fts5 { do_execsql_test 1.1 { CREATE VIRTUAL TABLE ft1 USING fts5(x); SELECT * FROM ft1_config; -} {version 3} +} {version 4} do_execsql_test 1.2 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 32); SELECT * FROM ft1_config; -} {pgsz 32 version 3} +} {pgsz 32 version 4} do_execsql_test 1.3 { INSERT INTO ft1(ft1, rank) VALUES('pgsz', 64); SELECT * FROM ft1_config; -} {pgsz 64 version 3} +} {pgsz 64 version 4} #-------------------------------------------------------------------------- # Test the logic for parsing the rank() function definition. diff --git a/ext/fts5/test/fts5version.test b/ext/fts5/test/fts5version.test index 8c5a772146..7e4d74d114 100644 --- a/ext/fts5/test/fts5version.test +++ b/ext/fts5/test/fts5version.test @@ -30,34 +30,34 @@ do_execsql_test 1.1 { do_execsql_test 1.2 { SELECT * FROM t1_config WHERE k='version' -} {version 3} +} {version 4} do_execsql_test 1.3 { SELECT rowid FROM t1 WHERE t1 MATCH 'a'; } {1} do_execsql_test 1.4 { - UPDATE t1_config set v=4 WHERE k='version'; + UPDATE t1_config set v=5 WHERE k='version'; } do_test 1.5 { db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } -} {1 {invalid fts5 file format (found 4, expected 3) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}} do_test 1.6 { db close sqlite3 db test.db catchsql { INSERT INTO t1 VALUES('x y z') } -} {1 {invalid fts5 file format (found 4, expected 3) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 5, expected 4) - run 'rebuild'}} do_test 1.7 { execsql { DELETE FROM t1_config WHERE k='version' } db close sqlite3 db test.db catchsql { SELECT * FROM t1 WHERE t1 MATCH 'a' } -} {1 {invalid fts5 file format (found 0, expected 3) - run 'rebuild'}} +} {1 {invalid fts5 file format (found 0, expected 4) - run 'rebuild'}} finish_test diff --git a/manifest b/manifest index b51eb758c4..f78303a53a 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Fix\sa\ssegfault\sin\sfts5\sthat\scould\soccur\sif\sthe\sdatabase\scontents\swere\scorrupt. -D 2015-09-10T16:19:01.581 +C Increment\sthe\sfts5\sversion\svalue\sto\sindicate\sthat\sthe\son-disk\sformat\shas\schanged. +D 2015-09-10T16:39:38.823 F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f F Makefile.in f85066ce844a28b671aaeeff320921cd0ce36239 F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23 @@ -106,7 +106,7 @@ F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252 F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95 F ext/fts5/extract_api_docs.tcl a36e54ec777172ddd3f9a88daf593b00848368e0 F ext/fts5/fts5.h f04659e0df5af83731b102189a32280f74f4a6bc -F ext/fts5/fts5Int.h 7e6002582133cb795a21468732cf7c35027a28e4 +F ext/fts5/fts5Int.h 81ba5e474979b166a52a8be306aa3b09d43a10e9 F ext/fts5/fts5_aux.c 7a307760a9c57c750d043188ec0bad59f5b5ec7e F ext/fts5/fts5_buffer.c 64dcaf36a3ebda9e84b7c3b8788887ec325e12a4 F ext/fts5/fts5_config.c 57ee5fe71578cb494574fc0e6e51acb9a22a8695 @@ -135,7 +135,7 @@ F ext/fts5/test/fts5ah.test e592c4978622dbc4de552cd0f9395df60ac5d54c F ext/fts5/test/fts5ai.test f20e53bbf0c55bc596f1fd47f2740dae028b8f37 F ext/fts5/test/fts5aj.test 05b569f5c16ea3098fb1984eec5cf50dbdaae5d8 F ext/fts5/test/fts5ak.test 7b8c5df96df599293f920b7e5521ebc79f647592 -F ext/fts5/test/fts5al.test 440d77c0b39ba73bad2ceb8986c2bb1093570735 +F ext/fts5/test/fts5al.test 5c79525671862861906fa0a848da462a8473eafb F ext/fts5/test/fts5alter.test 6022c61467a82aa11c70822ccad22b328dcf0d04 F ext/fts5/test/fts5auto.test caa5bcf917db11944655a2a9bd38c67c520376ca F ext/fts5/test/fts5aux.test 8c687c948cc98e9a94be014df7d518acc1b3b74f @@ -180,7 +180,7 @@ F ext/fts5/test/fts5unicode.test fbef8d8a3b4b88470536cc57604a82ca52e51841 F ext/fts5/test/fts5unicode2.test c1dd890ba32b7609adba78e420faa847abe43b59 F ext/fts5/test/fts5unicode3.test 35c3d02aa7acf7d43d8de3bfe32c15ba96e8928e F ext/fts5/test/fts5unindexed.test e9539d5b78c677315e7ed8ea911d4fd25437c680 -F ext/fts5/test/fts5version.test 205beb2a67d9496af64df959e6a19238f69b83e8 +F ext/fts5/test/fts5version.test 978f59541d8cef7e8591f8be2115ec5ccb863e2e F ext/fts5/test/fts5vocab.test cdf97b9678484e9bad5062edf9c9106e5c3b0c5c F ext/fts5/tool/loadfts5.tcl 58e90407cc5c2b1770460119488fd7c0090d4dd3 F ext/fts5/tool/mkfts5c.tcl 5745072c7de346e18c7f491e4c3281fe8a1cfe51 @@ -1385,7 +1385,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1 F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4 F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f -P 716e7e747714d6af502f6a87ca8d789bb7ce162a -R 635eaf5cd920b80fc2d9c9be975e5caf +P 4931e37da4d2c26d7afc5432f7f0d534b51a85fa +R eed5bbc75e101ddc9b925dace5d76427 U dan -Z e0d8c3d69e3d58e6c9e4d77310051289 +Z 95c35bdd15155223e616b807a46e7fe9 diff --git a/manifest.uuid b/manifest.uuid index 82e41d2242..477390ac7c 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -4931e37da4d2c26d7afc5432f7f0d534b51a85fa \ No newline at end of file +99de5e3613d557728dd196353516bc7cf64a0e6c \ No newline at end of file