/* ** 2014 May 31 ** ** The author disclaims copyright to this source code. In place of ** a legal notice, here is a blessing: ** ** May you do good and not evil. ** May you find forgiveness for yourself and forgive others. ** May you share freely, never taking more than you give. ** ****************************************************************************** ** ** Low level access to the FTS index stored in the database file. The ** routines in this file file implement all read and write access to the ** %_data table. Other parts of the system access this functionality via ** the interface defined in fts5Int.h. */ #ifdef SQLITE_ENABLE_FTS5 #include "fts5Int.h" /* ** Overview: ** ** The %_data table contains all the FTS indexes for an FTS5 virtual table. ** As well as the main term index, there may be up to 31 prefix indexes. ** The format is similar to FTS3/4, except that: ** ** * all segment b-tree leaf data is stored in fixed size page records ** (e.g. 1000 bytes). A single doclist may span multiple pages. Care is ** taken to ensure it is possible to iterate in either direction through ** the entries in a doclist, or to seek to a specific entry within a ** doclist, without loading it into memory. ** ** * large doclists that span many pages have associated "doclist index" ** records that contain a copy of the first docid on each page spanned by ** the doclist. This is used to speed up seek operations, and merges of ** large doclists with very small doclists. ** ** * extra fields in the "structure record" record the state of ongoing ** incremental merge operations. ** */ #define FTS5_OPT_WORK_UNIT 1000 /* Number of leaf pages per optimize step */ #define FTS5_WORK_UNIT 64 /* Number of leaf pages in unit of work */ #define FTS5_MIN_DLIDX_SIZE 4 /* Add dlidx if this many empty pages */ #define FTS5_MAIN_PREFIX '0' #if FTS5_MAX_PREFIX_INDEXES > 31 # error "FTS5_MAX_PREFIX_INDEXES is too large" #endif /* ** Details: ** ** The %_data table managed by this module, ** ** CREATE TABLE %_data(id INTEGER PRIMARY KEY, block BLOB); ** ** , contains the following 5 types of records. See the comments surrounding ** the FTS5_*_ROWID macros below for a description of how %_data rowids are ** assigned to each fo them. ** ** 1. Structure Records: ** ** The set of segments that make up an index - the index structure - are ** recorded in a single record within the %_data table. The record consists ** of a single 32-bit configuration cookie value followed by a list of ** SQLite varints. If the FTS table features more than one index (because ** there are one or more prefix indexes), it is guaranteed that all share ** the same cookie value. ** ** Immediately following the configuration cookie, the record begins with ** three varints: ** ** + number of levels, ** + total number of segments on all levels, ** + value of write counter. ** ** Then, for each level from 0 to nMax: ** ** + number of input segments in ongoing merge. ** + total number of segments in level. ** + for each segment from oldest to newest: ** + segment id (always > 0) ** + b-tree height (1 -> root is leaf, 2 -> root is parent of leaf etc.) ** + first leaf page number (often 1, always greater than 0) ** + final leaf page number ** ** 2. The Averages Record: ** ** A single record within the %_data table. The data is a list of varints. ** The first value is the number of rows in the index. Then, for each column ** from left to right, the total number of tokens in the column for all ** rows of the table. ** ** 3. Segment leaves: ** ** TERM DOCLIST FORMAT: ** ** Most of each segment leaf is taken up by term/doclist data. The ** general format of the term/doclist data is: ** ** varint : size of first term ** blob: first term data ** doclist: first doclist ** zero-or-more { ** varint: number of bytes in common with previous term ** varint: number of bytes of new term data (nNew) ** blob: nNew bytes of new term data ** doclist: next doclist ** } ** ** doclist format: ** ** varint: first rowid ** poslist: first poslist ** zero-or-more { ** varint: rowid delta (always > 0) ** poslist: next poslist ** } ** 0x00 byte ** ** poslist format: ** ** varint: size of poslist in bytes multiplied by 2, not including ** this field. Plus 1 if this entry carries the "delete" flag. ** collist: collist for column 0 ** zero-or-more { ** 0x01 byte ** varint: column number (I) ** collist: collist for column I ** } ** ** collist format: ** ** varint: first offset + 2 ** zero-or-more { ** varint: offset delta + 2 ** } ** ** PAGINATION ** ** The format described above is only accurate if the entire term/doclist ** data fits on a single leaf page. If this is not the case, the format ** is changed in two ways: ** ** + if the first rowid on a page occurs before the first term, it ** is stored as a literal value: ** ** varint: first rowid ** ** + the first term on each page is stored in the same way as the ** very first term of the segment: ** ** varint : size of first term ** blob: first term data ** ** Each leaf page begins with: ** ** + 2-byte unsigned containing offset to first rowid (or 0). ** + 2-byte unsigned containing offset to first term (or 0). ** ** Followed by term/doclist data. ** ** 4. Segment interior nodes: ** ** The interior nodes turn the list of leaves into a b+tree. ** ** Each interior node begins with a varint - the page number of the left ** most child node. Following this, for each leaf page except the first, ** the interior nodes contain: ** ** a) If the leaf page contains at least one term, then a term-prefix that ** is greater than all previous terms, and less than or equal to the ** first term on the leaf page. ** ** b) If the leaf page no terms, a record indicating how many consecutive ** leaves contain no terms, and whether or not there is an associated ** by-rowid index record. ** ** By definition, there is never more than one type (b) record in a row. ** Type (b) records only ever appear on height=1 pages - immediate parents ** of leaves. Only type (a) records are pushed to higher levels. ** ** Term format: ** ** * Number of bytes in common with previous term plus 2, as a varint. ** * Number of bytes of new term data, as a varint. ** * new term data. ** ** No-term format: ** ** * either an 0x00 or 0x01 byte. If the value 0x01 is used, then there ** is an associated index-by-rowid record. ** * the number of zero-term leaves as a varint. ** ** 5. Segment doclist indexes: ** ** Doclist indexes are themselves b-trees, however they usually consist of ** a single leaf record only. The format of each doclist index leaf page ** is: ** ** * Flags byte. Bits are: ** 0x01: Clear if leaf is also the root page, otherwise set. ** ** * Page number of fts index leaf page. As a varint. ** ** * First docid on page indicated by previous field. As a varint. ** ** * A list of varints, one for each subsequent termless page. A ** positive delta if the termless page contains at least one docid, ** or an 0x00 byte otherwise. ** ** Internal doclist index nodes are: ** ** * Flags byte. Bits are: ** 0x01: Clear for root page, otherwise set. ** ** * Page number of first child page. As a varint. ** ** * Copy of first docid on page indicated by previous field. As a varint. ** ** * A list of delta-encoded varints - the first docid on each subsequent ** child page. ** */ /* ** Rowids for the averages and structure records in the %_data table. */ #define FTS5_AVERAGES_ROWID 1 /* Rowid used for the averages record */ #define FTS5_STRUCTURE_ROWID 10 /* The structure record */ /* ** Macros determining the rowids used by segment nodes. All nodes in all ** segments for all indexes (the regular FTS index and any prefix indexes) ** are stored in the %_data table with large positive rowids. ** ** The %_data table may contain up to (1<=? AND id<=?" */ int nRead; /* Total number of blocks read */ }; struct Fts5DoclistIter { u8 *a; int n; int i; /* Output variables. aPoslist==0 at EOF */ i64 iRowid; u8 *aPoslist; int nPoslist; }; /* ** Each iterator used by external modules is an instance of this type. */ struct Fts5IndexIter { Fts5Index *pIndex; Fts5Structure *pStruct; Fts5MultiSegIter *pMulti; Fts5Buffer poslist; /* Buffer containing current poslist */ }; /* ** The contents of the "structure" record for each index are represented ** using an Fts5Structure record in memory. Which uses instances of the ** other Fts5StructureXXX types as components. */ struct Fts5StructureSegment { int iSegid; /* Segment id */ int nHeight; /* Height of segment b-tree */ int pgnoFirst; /* First leaf page number in segment */ int pgnoLast; /* Last leaf page number in segment */ }; struct Fts5StructureLevel { int nMerge; /* Number of segments in incr-merge */ int nSeg; /* Total number of segments on level */ Fts5StructureSegment *aSeg; /* Array of segments. aSeg[0] is oldest. */ }; struct Fts5Structure { u64 nWriteCounter; /* Total leaves written to level 0 */ int nSegment; /* Total segments in this structure */ int nLevel; /* Number of levels in this index */ Fts5StructureLevel aLevel[0]; /* Array of nLevel level objects */ }; /* ** An object of type Fts5SegWriter is used to write to segments. */ struct Fts5PageWriter { int pgno; /* Page number for this page */ Fts5Buffer buf; /* Buffer containing page data */ Fts5Buffer term; /* Buffer containing previous term on page */ }; struct Fts5DlidxWriter { int pgno; /* Page number for this page */ int bPrevValid; /* True if iPrev is valid */ i64 iPrev; /* Previous docid value written to page */ Fts5Buffer buf; /* Buffer containing page data */ }; struct Fts5SegWriter { int iSegid; /* Segid to write to */ int nWriter; /* Number of entries in aWriter */ Fts5PageWriter *aWriter; /* Array of PageWriter objects */ i64 iPrevRowid; /* Previous docid written to current leaf */ u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */ u8 bFirstRowidInPage; /* True if next rowid is first in page */ u8 bFirstTermInPage; /* True if next term will be first in leaf */ int nLeafWritten; /* Number of leaf pages written */ int nEmpty; /* Number of contiguous term-less nodes */ int nDlidx; /* Allocated size of aDlidx[] array */ Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */ }; /* ** Object for iterating through the merged results of one or more segments, ** visiting each term/docid pair in the merged data. ** ** nSeg is always a power of two greater than or equal to the number of ** segments that this object is merging data from. Both the aSeg[] and ** aFirst[] arrays are sized at nSeg entries. The aSeg[] array is padded ** with zeroed objects - these are handled as if they were iterators opened ** on empty segments. ** ** The results of comparing segments aSeg[N] and aSeg[N+1], where N is an ** even number, is stored in aFirst[(nSeg+N)/2]. The "result" of the ** comparison in this context is the index of the iterator that currently ** points to the smaller term/rowid combination. Iterators at EOF are ** considered to be greater than all other iterators. ** ** aFirst[1] contains the index in aSeg[] of the iterator that points to ** the smallest key overall. aFirst[0] is unused. */ typedef struct Fts5CResult Fts5CResult; struct Fts5CResult { u16 iFirst; /* aSeg[] index of firstest iterator */ u8 bTermEq; /* True if the terms are equal */ }; struct Fts5MultiSegIter { int nSeg; /* Size of aSeg[] array */ int bRev; /* True to iterate in reverse order */ int bSkipEmpty; /* True to skip deleted entries */ Fts5SegIter *aSeg; /* Array of segment iterators */ Fts5CResult *aFirst; /* Current merge state (see above) */ }; /* ** Object for iterating through a single segment, visiting each term/docid ** pair in the segment. ** ** pSeg: ** The segment to iterate through. ** ** iLeafPgno: ** Current leaf page number within segment. ** ** iLeafOffset: ** Byte offset within the current leaf that is the first byte of the ** position list data (one byte passed the position-list size field). ** rowid field of the current entry. Usually this is the size field of the ** position list data. The exception is if the rowid for the current entry ** is the last thing on the leaf page. ** ** pLeaf: ** Buffer containing current leaf page data. Set to NULL at EOF. ** ** iTermLeafPgno, iTermLeafOffset: ** Leaf page number containing the last term read from the segment. And ** the offset immediately following the term data. ** ** flags: ** Mask of FTS5_SEGITER_XXX values. Interpreted as follows: ** ** FTS5_SEGITER_ONETERM: ** If set, set the iterator to point to EOF after the current doclist ** has been exhausted. Do not proceed to the next term in the segment. ** ** FTS5_SEGITER_REVERSE: ** This flag is only ever set if FTS5_SEGITER_ONETERM is also set. If ** it is set, iterate through docids in descending order instead of the ** default ascending order. ** ** iRowidOffset/nRowidOffset/aRowidOffset: ** These are used if the FTS5_SEGITER_REVERSE flag is set. ** ** For each rowid on the page corresponding to the current term, the ** corresponding aRowidOffset[] entry is set to the byte offset of the ** start of the "position-list-size" field within the page. */ struct Fts5SegIter { Fts5StructureSegment *pSeg; /* Segment to iterate through */ int flags; /* Mask of configuration flags */ int iLeafPgno; /* Current leaf page number */ Fts5Data *pLeaf; /* Current leaf data */ Fts5Data *pNextLeaf; /* Leaf page (iLeafPgno+1) */ int iLeafOffset; /* Byte offset within current leaf */ /* The page and offset from which the current term was read. The offset ** is the offset of the first rowid in the current doclist. */ int iTermLeafPgno; int iTermLeafOffset; /* The following are only used if the FTS5_SEGITER_REVERSE flag is set. */ int iRowidOffset; /* Current entry in aRowidOffset[] */ int nRowidOffset; /* Allocated size of aRowidOffset[] array */ int *aRowidOffset; /* Array of offset to rowid fields */ Fts5DlidxIter *pDlidx; /* If there is a doclist-index */ /* Variables populated based on current entry. */ Fts5Buffer term; /* Current term */ i64 iRowid; /* Current rowid */ int nPos; /* Number of bytes in current position list */ int bDel; /* True if the delete flag is set */ }; #define FTS5_SEGITER_ONETERM 0x01 #define FTS5_SEGITER_REVERSE 0x02 /* ** Object for iterating through the conents of a single internal node in ** memory. */ struct Fts5NodeIter { /* Internal. Set and managed by fts5NodeIterXXX() functions. Except, ** the EOF test for the iterator is (Fts5NodeIter.aData==0). */ const u8 *aData; int nData; int iOff; /* Output variables */ Fts5Buffer term; int nEmpty; int iChild; int bDlidx; }; /* ** An instance of the following type is used to iterate through the contents ** of a doclist-index record. ** ** pData: ** Record containing the doclist-index data. ** ** bEof: ** Set to true once iterator has reached EOF. ** ** iOff: ** Set to the current offset within record pData. */ struct Fts5DlidxLvl { Fts5Data *pData; /* Data for current page of this level */ int iOff; /* Current offset into pData */ int bEof; /* At EOF already */ int iFirstOff; /* Used by reverse iterators */ /* Output variables */ int iLeafPgno; /* Page number of current leaf page */ i64 iRowid; /* First rowid on leaf iLeafPgno */ }; struct Fts5DlidxIter { int nLvl; int iSegid; Fts5DlidxLvl aLvl[1]; }; /* ** An Fts5BtreeIter object is used to iterate through all entries in the ** b-tree hierarchy belonging to a single fts5 segment. In this case the ** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the ** b-tree hierarchy consists of the following: ** ** iLeaf: The page number of the leaf page the entry points to. ** ** term: A split-key that all terms on leaf page $iLeaf must be greater ** than or equal to. The "term" associated with the first b-tree ** hierarchy entry (the one that points to leaf page 1) is always ** an empty string. ** ** nEmpty: The number of empty (termless) leaf pages that immediately ** following iLeaf. ** ** The Fts5BtreeIter object is only used as part of the integrity-check code. */ struct Fts5BtreeIterLevel { Fts5NodeIter s; /* Iterator for the current node */ Fts5Data *pData; /* Data for the current node */ }; struct Fts5BtreeIter { Fts5Index *p; /* FTS5 backend object */ Fts5StructureSegment *pSeg; /* Iterate through this segment's b-tree */ int nLvl; /* Size of aLvl[] array */ Fts5BtreeIterLevel *aLvl; /* Level for each tier of b-tree */ /* Output variables */ Fts5Buffer term; /* Current term */ int iLeaf; /* Leaf containing terms >= current term */ int nEmpty; /* Number of "empty" leaves following iLeaf */ int bEof; /* Set to true at EOF */ int bDlidx; /* True if there exists a dlidx */ }; static void fts5PutU16(u8 *aOut, u16 iVal){ aOut[0] = (iVal>>8); aOut[1] = (iVal&0xFF); } static u16 fts5GetU16(const u8 *aIn){ return ((u16)aIn[0] << 8) + aIn[1]; } /* ** Allocate and return a buffer at least nByte bytes in size. ** ** If an OOM error is encountered, return NULL and set the error code in ** the Fts5Index handle passed as the first argument. */ static void *fts5IdxMalloc(Fts5Index *p, int nByte){ return sqlite3Fts5MallocZero(&p->rc, nByte); } /* ** Compare the contents of the pLeft buffer with the pRight/nRight blob. ** ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or ** +ve if pRight is smaller than pLeft. In other words: ** ** res = *pLeft - *pRight */ static int fts5BufferCompareBlob( Fts5Buffer *pLeft, /* Left hand side of comparison */ const u8 *pRight, int nRight /* Right hand side of comparison */ ){ int nCmp = MIN(pLeft->n, nRight); int res = memcmp(pLeft->p, pRight, nCmp); return (res==0 ? (pLeft->n - nRight) : res); } /* ** Compare the contents of the two buffers using memcmp(). If one buffer ** is a prefix of the other, it is considered the lesser. ** ** Return -ve if pLeft is smaller than pRight, 0 if they are equal or ** +ve if pRight is smaller than pLeft. In other words: ** ** res = *pLeft - *pRight */ static int fts5BufferCompare(Fts5Buffer *pLeft, Fts5Buffer *pRight){ int nCmp = MIN(pLeft->n, pRight->n); int res = memcmp(pLeft->p, pRight->p, nCmp); return (res==0 ? (pLeft->n - pRight->n) : res); } #ifdef SQLITE_DEBUG static int fts5BlobCompare( const u8 *pLeft, int nLeft, const u8 *pRight, int nRight ){ int nCmp = MIN(nLeft, nRight); int res = memcmp(pLeft, pRight, nCmp); return (res==0 ? (nLeft - nRight) : res); } #endif /* ** Close the read-only blob handle, if it is open. */ static void fts5CloseReader(Fts5Index *p){ if( p->pReader ){ sqlite3_blob *pReader = p->pReader; p->pReader = 0; sqlite3_blob_close(pReader); } } static Fts5Data *fts5DataReadOrBuffer( Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid ){ Fts5Data *pRet = 0; if( p->rc==SQLITE_OK ){ int rc = SQLITE_OK; if( p->pReader ){ /* This call may return SQLITE_ABORT if there has been a savepoint ** rollback since it was last used. In this case a new blob handle ** is required. */ sqlite3_blob *pBlob = p->pReader; p->pReader = 0; rc = sqlite3_blob_reopen(pBlob, iRowid); assert( p->pReader==0 ); p->pReader = pBlob; if( rc!=SQLITE_OK ){ fts5CloseReader(p); } if( rc==SQLITE_ABORT ) rc = SQLITE_OK; } /* If the blob handle is not yet open, open and seek it. Otherwise, use ** the blob_reopen() API to reseek the existing blob handle. */ if( p->pReader==0 && rc==SQLITE_OK ){ Fts5Config *pConfig = p->pConfig; rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, "block", iRowid, 0, &p->pReader ); } /* If either of the sqlite3_blob_open() or sqlite3_blob_reopen() calls ** above returned SQLITE_ERROR, return SQLITE_CORRUPT_VTAB instead. ** All the reasons those functions might return SQLITE_ERROR - missing ** table, missing row, non-blob/text in block column - indicate ** backing store corruption. */ if( rc==SQLITE_ERROR ) rc = FTS5_CORRUPT; if( rc==SQLITE_OK ){ u8 *aOut; /* Read blob data into this buffer */ int nByte = sqlite3_blob_bytes(p->pReader); if( pBuf ){ fts5BufferZero(pBuf); fts5BufferGrow(&rc, pBuf, nByte); aOut = pBuf->p; pBuf->n = nByte; }else{ int nSpace = nByte + FTS5_DATA_ZERO_PADDING; pRet = (Fts5Data*)sqlite3Fts5MallocZero(&rc, nSpace+sizeof(Fts5Data)); if( pRet ){ pRet->n = nByte; aOut = pRet->p = (u8*)&pRet[1]; } } if( rc==SQLITE_OK ){ rc = sqlite3_blob_read(p->pReader, aOut, nByte, 0); } if( rc!=SQLITE_OK ){ sqlite3_free(pRet); pRet = 0; } } p->rc = rc; p->nRead++; } return pRet; } /* ** Retrieve a record from the %_data table. ** ** If an error occurs, NULL is returned and an error left in the ** Fts5Index object. */ static Fts5Data *fts5DataRead(Fts5Index *p, i64 iRowid){ Fts5Data *pRet = fts5DataReadOrBuffer(p, 0, iRowid); assert( (pRet==0)==(p->rc!=SQLITE_OK) ); return pRet; } /* ** Read a record from the %_data table into the buffer supplied as the ** second argument. ** ** If an error occurs, an error is left in the Fts5Index object. If an ** error has already occurred when this function is called, it is a ** no-op. */ static void fts5DataBuffer(Fts5Index *p, Fts5Buffer *pBuf, i64 iRowid){ (void)fts5DataReadOrBuffer(p, pBuf, iRowid); } /* ** Release a reference to data record returned by an earlier call to ** fts5DataRead(). */ static void fts5DataRelease(Fts5Data *pData){ sqlite3_free(pData); } /* ** INSERT OR REPLACE a record into the %_data table. */ static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){ if( p->rc!=SQLITE_OK ) return; if( p->pWriter==0 ){ int rc = SQLITE_OK; Fts5Config *pConfig = p->pConfig; char *zSql = sqlite3Fts5Mprintf(&rc, "REPLACE INTO '%q'.%Q(id, block) VALUES(?,?)", pConfig->zDb, p->zDataTbl ); if( zSql ){ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pWriter, 0); sqlite3_free(zSql); } if( rc!=SQLITE_OK ){ p->rc = rc; return; } } sqlite3_bind_int64(p->pWriter, 1, iRowid); sqlite3_bind_blob(p->pWriter, 2, pData, nData, SQLITE_STATIC); sqlite3_step(p->pWriter); p->rc = sqlite3_reset(p->pWriter); } /* ** Execute the following SQL: ** ** DELETE FROM %_data WHERE id BETWEEN $iFirst AND $iLast */ static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){ if( p->rc!=SQLITE_OK ) return; if( p->pDeleter==0 ){ int rc; Fts5Config *pConfig = p->pConfig; char *zSql = sqlite3_mprintf( "DELETE FROM '%q'.%Q WHERE id>=? AND id<=?", pConfig->zDb, p->zDataTbl ); if( zSql==0 ){ rc = SQLITE_NOMEM; }else{ rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pDeleter, 0); sqlite3_free(zSql); } if( rc!=SQLITE_OK ){ p->rc = rc; return; } } sqlite3_bind_int64(p->pDeleter, 1, iFirst); sqlite3_bind_int64(p->pDeleter, 2, iLast); sqlite3_step(p->pDeleter); p->rc = sqlite3_reset(p->pDeleter); } /* ** Remove all records associated with segment iSegid. */ static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){ i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0, 0); i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0, 0)-1; fts5DataDelete(p, iFirst, iLast); } /* ** Release a reference to an Fts5Structure object returned by an earlier ** call to fts5StructureRead() or fts5StructureDecode(). */ static void fts5StructureRelease(Fts5Structure *pStruct){ if( pStruct ){ int i; for(i=0; inLevel; i++){ sqlite3_free(pStruct->aLevel[i].aSeg); } sqlite3_free(pStruct); } } /* ** Deserialize and return the structure record currently stored in serialized ** form within buffer pData/nData. ** ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array ** are over-allocated by one slot. This allows the structure contents ** to be more easily edited. ** ** If an error occurs, *ppOut is set to NULL and an SQLite error code ** returned. Otherwise, *ppOut is set to point to the new object and ** SQLITE_OK returned. */ static int fts5StructureDecode( const u8 *pData, /* Buffer containing serialized structure */ int nData, /* Size of buffer pData in bytes */ int *piCookie, /* Configuration cookie value */ Fts5Structure **ppOut /* OUT: Deserialized object */ ){ int rc = SQLITE_OK; int i = 0; int iLvl; int nLevel = 0; int nSegment = 0; int nByte; /* Bytes of space to allocate at pRet */ Fts5Structure *pRet = 0; /* Structure object to return */ /* Grab the cookie value */ if( piCookie ) *piCookie = sqlite3Fts5Get32(pData); i = 4; /* Read the total number of levels and segments from the start of the ** structure record. */ i += fts5GetVarint32(&pData[i], nLevel); i += fts5GetVarint32(&pData[i], nSegment); nByte = ( sizeof(Fts5Structure) + /* Main structure */ sizeof(Fts5StructureLevel) * (nLevel) /* aLevel[] array */ ); pRet = (Fts5Structure*)sqlite3Fts5MallocZero(&rc, nByte); if( pRet ){ pRet->nLevel = nLevel; pRet->nSegment = nSegment; i += sqlite3Fts5GetVarint(&pData[i], &pRet->nWriteCounter); for(iLvl=0; rc==SQLITE_OK && iLvlaLevel[iLvl]; int nTotal; int iSeg; i += fts5GetVarint32(&pData[i], pLvl->nMerge); i += fts5GetVarint32(&pData[i], nTotal); assert( nTotal>=pLvl->nMerge ); pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&rc, nTotal * sizeof(Fts5StructureSegment) ); if( rc==SQLITE_OK ){ pLvl->nSeg = nTotal; for(iSeg=0; iSegaSeg[iSeg].iSegid); i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].nHeight); i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoFirst); i += fts5GetVarint32(&pData[i], pLvl->aSeg[iSeg].pgnoLast); } }else{ fts5StructureRelease(pRet); pRet = 0; } } } *ppOut = pRet; return rc; } /* ** */ static void fts5StructureAddLevel(int *pRc, Fts5Structure **ppStruct){ if( *pRc==SQLITE_OK ){ Fts5Structure *pStruct = *ppStruct; int nLevel = pStruct->nLevel; int nByte = ( sizeof(Fts5Structure) + /* Main structure */ sizeof(Fts5StructureLevel) * (nLevel+1) /* aLevel[] array */ ); pStruct = sqlite3_realloc(pStruct, nByte); if( pStruct ){ memset(&pStruct->aLevel[nLevel], 0, sizeof(Fts5StructureLevel)); pStruct->nLevel++; *ppStruct = pStruct; }else{ *pRc = SQLITE_NOMEM; } } } /* ** Extend level iLvl so that there is room for at least nExtra more ** segments. */ static void fts5StructureExtendLevel( int *pRc, Fts5Structure *pStruct, int iLvl, int nExtra, int bInsert ){ if( *pRc==SQLITE_OK ){ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; Fts5StructureSegment *aNew; int nByte; nByte = (pLvl->nSeg + nExtra) * sizeof(Fts5StructureSegment); aNew = sqlite3_realloc(pLvl->aSeg, nByte); if( aNew ){ if( bInsert==0 ){ memset(&aNew[pLvl->nSeg], 0, sizeof(Fts5StructureSegment) * nExtra); }else{ int nMove = pLvl->nSeg * sizeof(Fts5StructureSegment); memmove(&aNew[nExtra], aNew, nMove); memset(aNew, 0, sizeof(Fts5StructureSegment) * nExtra); } pLvl->aSeg = aNew; }else{ *pRc = SQLITE_NOMEM; } } } /* ** Read, deserialize and return the structure record. ** ** The Fts5Structure.aLevel[] and each Fts5StructureLevel.aSeg[] array ** are over-allocated as described for function fts5StructureDecode() ** above. ** ** If an error occurs, NULL is returned and an error code left in the ** Fts5Index handle. If an error has already occurred when this function ** is called, it is a no-op. */ static Fts5Structure *fts5StructureRead(Fts5Index *p){ Fts5Config *pConfig = p->pConfig; Fts5Structure *pRet = 0; /* Object to return */ Fts5Data *pData; /* %_data entry containing structure record */ int iCookie; /* Configuration cookie */ pData = fts5DataRead(p, FTS5_STRUCTURE_ROWID); if( !pData ) return 0; p->rc = fts5StructureDecode(pData->p, pData->n, &iCookie, &pRet); if( p->rc==SQLITE_OK && pConfig->iCookie!=iCookie ){ p->rc = sqlite3Fts5ConfigLoad(pConfig, iCookie); } fts5DataRelease(pData); if( p->rc!=SQLITE_OK ){ fts5StructureRelease(pRet); pRet = 0; } return pRet; } /* ** Return the total number of segments in index structure pStruct. This ** function is only ever used as part of assert() conditions. */ #ifdef SQLITE_DEBUG static int fts5StructureCountSegments(Fts5Structure *pStruct){ int nSegment = 0; /* Total number of segments */ if( pStruct ){ int iLvl; /* Used to iterate through levels */ for(iLvl=0; iLvlnLevel; iLvl++){ nSegment += pStruct->aLevel[iLvl].nSeg; } } return nSegment; } #endif /* ** Serialize and store the "structure" record. ** ** If an error occurs, leave an error code in the Fts5Index object. If an ** error has already occurred, this function is a no-op. */ static void fts5StructureWrite(Fts5Index *p, Fts5Structure *pStruct){ if( p->rc==SQLITE_OK ){ Fts5Buffer buf; /* Buffer to serialize record into */ int iLvl; /* Used to iterate through levels */ int iCookie; /* Cookie value to store */ assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); memset(&buf, 0, sizeof(Fts5Buffer)); /* Append the current configuration cookie */ iCookie = p->pConfig->iCookie; if( iCookie<0 ) iCookie = 0; fts5BufferAppend32(&p->rc, &buf, iCookie); fts5BufferAppendVarint(&p->rc, &buf, pStruct->nLevel); fts5BufferAppendVarint(&p->rc, &buf, pStruct->nSegment); fts5BufferAppendVarint(&p->rc, &buf, (i64)pStruct->nWriteCounter); for(iLvl=0; iLvlnLevel; iLvl++){ int iSeg; /* Used to iterate through segments */ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; fts5BufferAppendVarint(&p->rc, &buf, pLvl->nMerge); fts5BufferAppendVarint(&p->rc, &buf, pLvl->nSeg); assert( pLvl->nMerge<=pLvl->nSeg ); for(iSeg=0; iSegnSeg; iSeg++){ fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].iSegid); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].nHeight); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoFirst); fts5BufferAppendVarint(&p->rc, &buf, pLvl->aSeg[iSeg].pgnoLast); } } fts5DataWrite(p, FTS5_STRUCTURE_ROWID, buf.p, buf.n); fts5BufferFree(&buf); } } #if 0 static void fts5DebugStructure(int*,Fts5Buffer*,Fts5Structure*); static void fts5PrintStructure(const char *zCaption, Fts5Structure *pStruct){ int rc = SQLITE_OK; Fts5Buffer buf; memset(&buf, 0, sizeof(buf)); fts5DebugStructure(&rc, &buf, pStruct); fprintf(stdout, "%s: %s\n", zCaption, buf.p); fflush(stdout); fts5BufferFree(&buf); } #else # define fts5PrintStructure(x,y) #endif static int fts5SegmentSize(Fts5StructureSegment *pSeg){ return 1 + pSeg->pgnoLast - pSeg->pgnoFirst; } /* ** Return a copy of index structure pStruct. Except, promote as many ** segments as possible to level iPromote. If an OOM occurs, NULL is ** returned. */ static void fts5StructurePromoteTo( Fts5Index *p, int iPromote, int szPromote, Fts5Structure *pStruct ){ int il, is; Fts5StructureLevel *pOut = &pStruct->aLevel[iPromote]; if( pOut->nMerge==0 ){ for(il=iPromote+1; ilnLevel; il++){ Fts5StructureLevel *pLvl = &pStruct->aLevel[il]; if( pLvl->nMerge ) return; for(is=pLvl->nSeg-1; is>=0; is--){ int sz = fts5SegmentSize(&pLvl->aSeg[is]); if( sz>szPromote ) return; fts5StructureExtendLevel(&p->rc, pStruct, iPromote, 1, 1); if( p->rc ) return; memcpy(pOut->aSeg, &pLvl->aSeg[is], sizeof(Fts5StructureSegment)); pOut->nSeg++; pLvl->nSeg--; } } } } /* ** A new segment has just been written to level iLvl of index structure ** pStruct. This function determines if any segments should be promoted ** as a result. Segments are promoted in two scenarios: ** ** a) If the segment just written is smaller than one or more segments ** within the previous populated level, it is promoted to the previous ** populated level. ** ** b) If the segment just written is larger than the newest segment on ** the next populated level, then that segment, and any other adjacent ** segments that are also smaller than the one just written, are ** promoted. ** ** If one or more segments are promoted, the structure object is updated ** to reflect this. */ static void fts5StructurePromote( Fts5Index *p, /* FTS5 backend object */ int iLvl, /* Index level just updated */ Fts5Structure *pStruct /* Index structure */ ){ if( p->rc==SQLITE_OK ){ int iTst; int iPromote = -1; int szPromote; /* Promote anything this size or smaller */ Fts5StructureSegment *pSeg; /* Segment just written */ int szSeg; /* Size of segment just written */ pSeg = &pStruct->aLevel[iLvl].aSeg[pStruct->aLevel[iLvl].nSeg-1]; szSeg = (1 + pSeg->pgnoLast - pSeg->pgnoFirst); /* Check for condition (a) */ for(iTst=iLvl-1; iTst>=0 && pStruct->aLevel[iTst].nSeg==0; iTst--); if( iTst>=0 ){ int i; int szMax = 0; Fts5StructureLevel *pTst = &pStruct->aLevel[iTst]; assert( pTst->nMerge==0 ); for(i=0; inSeg; i++){ int sz = pTst->aSeg[i].pgnoLast - pTst->aSeg[i].pgnoFirst + 1; if( sz>szMax ) szMax = sz; } if( szMax>=szSeg ){ /* Condition (a) is true. Promote the newest segment on level ** iLvl to level iTst. */ iPromote = iTst; szPromote = szMax; } } /* If condition (a) is not met, assume (b) is true. StructurePromoteTo() ** is a no-op if it is not. */ if( iPromote<0 ){ iPromote = iLvl; szPromote = szSeg; } fts5StructurePromoteTo(p, iPromote, szPromote, pStruct); } } /* ** If the pIter->iOff offset currently points to an entry indicating one ** or more term-less nodes, advance past it and set pIter->nEmpty to ** the number of empty child nodes. */ static void fts5NodeIterGobbleNEmpty(Fts5NodeIter *pIter){ if( pIter->iOffnData && 0==(pIter->aData[pIter->iOff] & 0xfe) ){ pIter->bDlidx = pIter->aData[pIter->iOff] & 0x01; pIter->iOff++; pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], pIter->nEmpty); }else{ pIter->nEmpty = 0; pIter->bDlidx = 0; } } /* ** Advance to the next entry within the node. */ static void fts5NodeIterNext(int *pRc, Fts5NodeIter *pIter){ if( pIter->iOff>=pIter->nData ){ pIter->aData = 0; pIter->iChild += pIter->nEmpty; }else{ int nPre, nNew; pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nPre); pIter->iOff += fts5GetVarint32(&pIter->aData[pIter->iOff], nNew); pIter->term.n = nPre-2; fts5BufferAppendBlob(pRc, &pIter->term, nNew, pIter->aData+pIter->iOff); pIter->iOff += nNew; pIter->iChild += (1 + pIter->nEmpty); fts5NodeIterGobbleNEmpty(pIter); if( *pRc ) pIter->aData = 0; } } /* ** Initialize the iterator object pIter to iterate through the internal ** segment node in pData. */ static void fts5NodeIterInit(const u8 *aData, int nData, Fts5NodeIter *pIter){ memset(pIter, 0, sizeof(*pIter)); pIter->aData = aData; pIter->nData = nData; pIter->iOff = fts5GetVarint32(aData, pIter->iChild); fts5NodeIterGobbleNEmpty(pIter); } /* ** Free any memory allocated by the iterator object. */ static void fts5NodeIterFree(Fts5NodeIter *pIter){ fts5BufferFree(&pIter->term); } /* ** Advance the iterator passed as the only argument. If the end of the ** doclist-index page is reached, return non-zero. */ static int fts5DlidxLvlNext(Fts5DlidxLvl *pLvl){ Fts5Data *pData = pLvl->pData; if( pLvl->iOff==0 ){ assert( pLvl->bEof==0 ); pLvl->iOff = 1; pLvl->iOff += fts5GetVarint32(&pData->p[1], pLvl->iLeafPgno); pLvl->iOff += fts5GetVarint(&pData->p[pLvl->iOff], (u64*)&pLvl->iRowid); pLvl->iFirstOff = pLvl->iOff; }else{ int iOff; for(iOff=pLvl->iOff; iOffn; iOff++){ if( pData->p[iOff] ) break; } if( iOffn ){ i64 iVal; pLvl->iLeafPgno += (iOff - pLvl->iOff) + 1; iOff += fts5GetVarint(&pData->p[iOff], (u64*)&iVal); pLvl->iRowid += iVal; pLvl->iOff = iOff; }else{ pLvl->bEof = 1; } } return pLvl->bEof; } /* ** Advance the iterator passed as the only argument. */ static int fts5DlidxIterNextR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; assert( iLvlnLvl ); if( fts5DlidxLvlNext(pLvl) ){ if( (iLvl+1) < pIter->nLvl ){ fts5DlidxIterNextR(p, pIter, iLvl+1); if( pLvl[1].bEof==0 ){ fts5DataRelease(pLvl->pData); memset(pLvl, 0, sizeof(Fts5DlidxLvl)); pLvl->pData = fts5DataRead(p, FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) ); if( pLvl->pData ) fts5DlidxLvlNext(pLvl); } } } return pIter->aLvl[0].bEof; } static int fts5DlidxIterNext(Fts5Index *p, Fts5DlidxIter *pIter){ return fts5DlidxIterNextR(p, pIter, 0); } /* ** The iterator passed as the first argument has the following fields set ** as follows. This function sets up the rest of the iterator so that it ** points to the first rowid in the doclist-index. ** ** pData: ** pointer to doclist-index record, ** ** When this function is called pIter->iLeafPgno is the page number the ** doclist is associated with (the one featuring the term). */ static int fts5DlidxIterFirst(Fts5DlidxIter *pIter){ int i; for(i=0; inLvl; i++){ fts5DlidxLvlNext(&pIter->aLvl[i]); } return pIter->aLvl[0].bEof; } static int fts5DlidxIterEof(Fts5Index *p, Fts5DlidxIter *pIter){ return p->rc!=SQLITE_OK || pIter->aLvl[0].bEof; } static void fts5DlidxIterLast(Fts5Index *p, Fts5DlidxIter *pIter){ int i; /* Advance each level to the last entry on the last page */ for(i=pIter->nLvl-1; p->rc==SQLITE_OK && i>=0; i--){ Fts5DlidxLvl *pLvl = &pIter->aLvl[i]; while( fts5DlidxLvlNext(pLvl)==0 ); pLvl->bEof = 0; if( i>0 ){ Fts5DlidxLvl *pChild = &pLvl[-1]; fts5DataRelease(pChild->pData); memset(pChild, 0, sizeof(Fts5DlidxLvl)); pChild->pData = fts5DataRead(p, FTS5_DLIDX_ROWID(pIter->iSegid, i-1, pLvl->iLeafPgno) ); } } } /* ** Move the iterator passed as the only argument to the previous entry. */ static int fts5DlidxLvlPrev(Fts5DlidxLvl *pLvl){ int iOff = pLvl->iOff; assert( pLvl->bEof==0 ); if( iOff<=pLvl->iFirstOff ){ pLvl->bEof = 1; }else{ u8 *a = pLvl->pData->p; i64 iVal; int iLimit; int ii; int nZero = 0; /* Currently iOff points to the first byte of a varint. This block ** decrements iOff until it points to the first byte of the previous ** varint. Taking care not to read any memory locations that occur ** before the buffer in memory. */ iLimit = (iOff>9 ? iOff-9 : 0); for(iOff--; iOff>iLimit; iOff--){ if( (a[iOff-1] & 0x80)==0 ) break; } fts5GetVarint(&a[iOff], (u64*)&iVal); pLvl->iRowid -= iVal; pLvl->iLeafPgno--; /* Skip backwards past any 0x00 varints. */ for(ii=iOff-1; ii>=pLvl->iFirstOff && a[ii]==0x00; ii--){ nZero++; } if( ii>=pLvl->iFirstOff && (a[ii] & 0x80) ){ /* The byte immediately before the last 0x00 byte has the 0x80 bit ** set. So the last 0x00 is only a varint 0 if there are 8 more 0x80 ** bytes before a[ii]. */ int bZero = 0; /* True if last 0x00 counts */ if( (ii-8)>=pLvl->iFirstOff ){ int j; for(j=1; j<=8 && (a[ii-j] & 0x80); j++); bZero = (j>8); } if( bZero==0 ) nZero--; } pLvl->iLeafPgno -= nZero; pLvl->iOff = iOff - nZero; } return pLvl->bEof; } static int fts5DlidxIterPrevR(Fts5Index *p, Fts5DlidxIter *pIter, int iLvl){ Fts5DlidxLvl *pLvl = &pIter->aLvl[iLvl]; assert( iLvlnLvl ); if( fts5DlidxLvlPrev(pLvl) ){ if( (iLvl+1) < pIter->nLvl ){ fts5DlidxIterPrevR(p, pIter, iLvl+1); if( pLvl[1].bEof==0 ){ fts5DataRelease(pLvl->pData); memset(pLvl, 0, sizeof(Fts5DlidxLvl)); pLvl->pData = fts5DataRead(p, FTS5_DLIDX_ROWID(pIter->iSegid, iLvl, pLvl[1].iLeafPgno) ); if( pLvl->pData ){ while( fts5DlidxLvlNext(pLvl)==0 ); pLvl->bEof = 0; } } } } return pIter->aLvl[0].bEof; } static int fts5DlidxIterPrev(Fts5Index *p, Fts5DlidxIter *pIter){ return fts5DlidxIterPrevR(p, pIter, 0); } /* ** Free a doclist-index iterator object allocated by fts5DlidxIterInit(). */ static void fts5DlidxIterFree(Fts5DlidxIter *pIter){ if( pIter ){ int i; for(i=0; inLvl; i++){ fts5DataRelease(pIter->aLvl[i].pData); } sqlite3_free(pIter); } } static Fts5DlidxIter *fts5DlidxIterInit( Fts5Index *p, /* Fts5 Backend to iterate within */ int bRev, /* True for ORDER BY ASC */ int iSegid, /* Segment id */ int iLeafPg /* Leaf page number to load dlidx for */ ){ Fts5DlidxIter *pIter = 0; int i; int bDone = 0; for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ int nByte = sizeof(Fts5DlidxIter) + i * sizeof(Fts5DlidxLvl); Fts5DlidxIter *pNew; pNew = (Fts5DlidxIter*)sqlite3_realloc(pIter, nByte); if( pNew==0 ){ p->rc = SQLITE_NOMEM; }else{ i64 iRowid = FTS5_DLIDX_ROWID(iSegid, i, iLeafPg); Fts5DlidxLvl *pLvl = &pNew->aLvl[i]; pIter = pNew; memset(pLvl, 0, sizeof(Fts5DlidxLvl)); pLvl->pData = fts5DataRead(p, iRowid); if( pLvl->pData && (pLvl->pData->p[0] & 0x0001)==0 ){ bDone = 1; } pIter->nLvl = i+1; } } if( p->rc==SQLITE_OK ){ pIter->iSegid = iSegid; if( bRev==0 ){ fts5DlidxIterFirst(pIter); }else{ fts5DlidxIterLast(p, pIter); } } if( p->rc!=SQLITE_OK ){ fts5DlidxIterFree(pIter); pIter = 0; } return pIter; } static i64 fts5DlidxIterRowid(Fts5DlidxIter *pIter){ return pIter->aLvl[0].iRowid; } static int fts5DlidxIterPgno(Fts5DlidxIter *pIter){ return pIter->aLvl[0].iLeafPgno; } static void fts5LeafHeader(Fts5Data *pLeaf, int *piRowid, int *piTerm){ *piRowid = (int)fts5GetU16(&pLeaf->p[0]); *piTerm = (int)fts5GetU16(&pLeaf->p[2]); } /* ** Load the next leaf page into the segment iterator. */ static void fts5SegIterNextPage( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter /* Iterator to advance to next page */ ){ Fts5StructureSegment *pSeg = pIter->pSeg; fts5DataRelease(pIter->pLeaf); pIter->iLeafPgno++; if( pIter->pNextLeaf ){ assert( pIter->iLeafPgno<=pSeg->pgnoLast ); pIter->pLeaf = pIter->pNextLeaf; pIter->pNextLeaf = 0; }else if( pIter->iLeafPgno<=pSeg->pgnoLast ){ pIter->pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pIter->iLeafPgno) ); }else{ pIter->pLeaf = 0; } } /* ** Argument p points to a buffer containing a varint to be interpreted as a ** position list size field. Read the varint and return the number of bytes ** read. Before returning, set *pnSz to the number of bytes in the position ** list, and *pbDel to true if the delete flag is set, or false otherwise. */ static int fts5GetPoslistSize(const u8 *p, int *pnSz, int *pbDel){ int nSz; int n = fts5GetVarint32(p, nSz); assert_nc( nSz>=0 ); *pnSz = nSz/2; *pbDel = nSz & 0x0001; return n; } /* ** Fts5SegIter.iLeafOffset currently points to the first byte of a ** position-list size field. Read the value of the field and store it ** in the following variables: ** ** Fts5SegIter.nPos ** Fts5SegIter.bDel ** ** Leave Fts5SegIter.iLeafOffset pointing to the first byte of the ** position list content (if any). */ static void fts5SegIterLoadNPos(Fts5Index *p, Fts5SegIter *pIter){ if( p->rc==SQLITE_OK ){ int iOff = pIter->iLeafOffset; /* Offset to read at */ if( iOff>=pIter->pLeaf->n ){ p->rc = FTS5_CORRUPT; }else{ const u8 *a = &pIter->pLeaf->p[iOff]; pIter->iLeafOffset += fts5GetPoslistSize(a, &pIter->nPos, &pIter->bDel); } } } /* ** Fts5SegIter.iLeafOffset currently points to the first byte of the ** "nSuffix" field of a term. Function parameter nKeep contains the value ** of the "nPrefix" field (if there was one - it is passed 0 if this is ** the first term in the segment). ** ** This function populates: ** ** Fts5SegIter.term ** Fts5SegIter.rowid ** ** accordingly and leaves (Fts5SegIter.iLeafOffset) set to the content of ** the first position list. The position list belonging to document ** (Fts5SegIter.iRowid). */ static void fts5SegIterLoadTerm(Fts5Index *p, Fts5SegIter *pIter, int nKeep){ u8 *a = pIter->pLeaf->p; /* Buffer to read data from */ int iOff = pIter->iLeafOffset; /* Offset to read at */ int nNew; /* Bytes of new data */ iOff += fts5GetVarint32(&a[iOff], nNew); pIter->term.n = nKeep; fts5BufferAppendBlob(&p->rc, &pIter->term, nNew, &a[iOff]); iOff += nNew; pIter->iTermLeafOffset = iOff; pIter->iTermLeafPgno = pIter->iLeafPgno; if( iOff>=pIter->pLeaf->n ){ fts5SegIterNextPage(p, pIter); if( pIter->pLeaf==0 ){ if( p->rc==SQLITE_OK ) p->rc = FTS5_CORRUPT; return; } iOff = 4; a = pIter->pLeaf->p; } iOff += sqlite3Fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; } /* ** Initialize the iterator object pIter to iterate through the entries in ** segment pSeg. The iterator is left pointing to the first entry when ** this function returns. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If ** an error has already occurred when this function is called, it is a no-op. */ static void fts5SegIterInit( Fts5Index *p, /* FTS index object */ Fts5StructureSegment *pSeg, /* Description of segment */ Fts5SegIter *pIter /* Object to populate */ ){ if( pSeg->pgnoFirst==0 ){ /* This happens if the segment is being used as an input to an incremental ** merge and all data has already been "trimmed". See function ** fts5TrimSegments() for details. In this case leave the iterator empty. ** The caller will see the (pIter->pLeaf==0) and assume the iterator is ** at EOF already. */ assert( pIter->pLeaf==0 ); return; } if( p->rc==SQLITE_OK ){ memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; pIter->iLeafPgno = pSeg->pgnoFirst-1; fts5SegIterNextPage(p, pIter); } if( p->rc==SQLITE_OK ){ u8 *a = pIter->pLeaf->p; pIter->iLeafOffset = fts5GetU16(&a[2]); fts5SegIterLoadTerm(p, pIter, 0); fts5SegIterLoadNPos(p, pIter); } } /* ** This function is only ever called on iterators created by calls to ** Fts5IndexQuery() with the FTS5INDEX_QUERY_DESC flag set. ** ** The iterator is in an unusual state when this function is called: the ** Fts5SegIter.iLeafOffset variable is set to the offset of the start of ** the position-list size field for the first relevant rowid on the page. ** Fts5SegIter.rowid is set, but nPos and bDel are not. ** ** This function advances the iterator so that it points to the last ** relevant rowid on the page and, if necessary, initializes the ** aRowidOffset[] and iRowidOffset variables. At this point the iterator ** is in its regular state - Fts5SegIter.iLeafOffset points to the first ** byte of the position list content associated with said rowid. */ static void fts5SegIterReverseInitPage(Fts5Index *p, Fts5SegIter *pIter){ int n = pIter->pLeaf->n; int i = pIter->iLeafOffset; u8 *a = pIter->pLeaf->p; int iRowidOffset = 0; while( 1 ){ i64 iDelta = 0; int nPos; int bDummy; i += fts5GetPoslistSize(&a[i], &nPos, &bDummy); i += nPos; if( i>=n ) break; i += fts5GetVarint(&a[i], (u64*)&iDelta); if( iDelta==0 ) break; pIter->iRowid += iDelta; if( iRowidOffset>=pIter->nRowidOffset ){ int nNew = pIter->nRowidOffset + 8; int *aNew = (int*)sqlite3_realloc(pIter->aRowidOffset, nNew*sizeof(int)); if( aNew==0 ){ p->rc = SQLITE_NOMEM; break; } pIter->aRowidOffset = aNew; pIter->nRowidOffset = nNew; } pIter->aRowidOffset[iRowidOffset++] = pIter->iLeafOffset; pIter->iLeafOffset = i; } pIter->iRowidOffset = iRowidOffset; fts5SegIterLoadNPos(p, pIter); } /* ** */ static void fts5SegIterReverseNewPage(Fts5Index *p, Fts5SegIter *pIter){ assert( pIter->flags & FTS5_SEGITER_REVERSE ); assert( pIter->flags & FTS5_SEGITER_ONETERM ); fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; while( p->rc==SQLITE_OK && pIter->iLeafPgno>pIter->iTermLeafPgno ){ Fts5Data *pNew; pIter->iLeafPgno--; pNew = fts5DataRead(p, FTS5_SEGMENT_ROWID( pIter->pSeg->iSegid, 0, pIter->iLeafPgno )); if( pNew ){ if( pIter->iLeafPgno==pIter->iTermLeafPgno ){ if( pIter->iTermLeafOffsetn ){ pIter->pLeaf = pNew; pIter->iLeafOffset = pIter->iTermLeafOffset; } }else{ int iRowidOff, dummy; fts5LeafHeader(pNew, &iRowidOff, &dummy); if( iRowidOff ){ pIter->pLeaf = pNew; pIter->iLeafOffset = iRowidOff; } } if( pIter->pLeaf ){ u8 *a = &pIter->pLeaf->p[pIter->iLeafOffset]; pIter->iLeafOffset += fts5GetVarint(a, (u64*)&pIter->iRowid); break; }else{ fts5DataRelease(pNew); } } } if( pIter->pLeaf ){ fts5SegIterReverseInitPage(p, pIter); } } /* ** Return true if the iterator passed as the second argument currently ** points to a delete marker. A delete marker is an entry with a 0 byte ** position-list. */ static int fts5MultiIterIsEmpty(Fts5Index *p, Fts5MultiSegIter *pIter){ Fts5SegIter *pSeg = &pIter->aSeg[pIter->aFirst[1].iFirst]; return (p->rc==SQLITE_OK && pSeg->pLeaf && pSeg->nPos==0); } /* ** Advance iterator pIter to the next entry. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. It ** is not considered an error if the iterator reaches EOF. If an error has ** already occurred when this function is called, it is a no-op. */ static void fts5SegIterNext( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter, /* Iterator to advance */ int *pbNewTerm /* OUT: Set for new term */ ){ assert( pbNewTerm==0 || *pbNewTerm==0 ); if( p->rc==SQLITE_OK ){ if( pIter->flags & FTS5_SEGITER_REVERSE ){ assert( pIter->pNextLeaf==0 ); if( pIter->iRowidOffset>0 ){ u8 *a = pIter->pLeaf->p; int iOff; int nPos; int bDummy; i64 iDelta; pIter->iRowidOffset--; pIter->iLeafOffset = iOff = pIter->aRowidOffset[pIter->iRowidOffset]; iOff += fts5GetPoslistSize(&a[iOff], &nPos, &bDummy); iOff += nPos; fts5GetVarint(&a[iOff], (u64*)&iDelta); pIter->iRowid -= iDelta; fts5SegIterLoadNPos(p, pIter); }else{ fts5SegIterReverseNewPage(p, pIter); } }else{ Fts5Data *pLeaf = pIter->pLeaf; int iOff; int bNewTerm = 0; int nKeep = 0; /* Search for the end of the position list within the current page. */ u8 *a = pLeaf->p; int n = pLeaf->n; iOff = pIter->iLeafOffset + pIter->nPos; if( iOffiLeafOffset = iOff; if( iDelta==0 ){ bNewTerm = 1; if( iOff>=n ){ fts5SegIterNextPage(p, pIter); pIter->iLeafOffset = 4; }else if( iOff!=fts5GetU16(&a[2]) ){ pIter->iLeafOffset += fts5GetVarint32(&a[iOff], nKeep); } }else{ pIter->iRowid += iDelta; } }else if( pIter->pSeg==0 ){ const u8 *pList = 0; const char *zTerm; int nList; if( 0==(pIter->flags & FTS5_SEGITER_ONETERM) ){ sqlite3Fts5HashScanNext(p->pHash); sqlite3Fts5HashScanEntry(p->pHash, &zTerm, &pList, &nList); } if( pList==0 ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; }else{ pIter->pLeaf->p = (u8*)pList; pIter->pLeaf->n = nList; sqlite3Fts5BufferSet(&p->rc, &pIter->term, strlen(zTerm), (u8*)zTerm); pIter->iLeafOffset = fts5GetVarint(pList, (u64*)&pIter->iRowid); } }else{ iOff = 0; /* Next entry is not on the current page */ while( iOff==0 ){ fts5SegIterNextPage(p, pIter); pLeaf = pIter->pLeaf; if( pLeaf==0 ) break; if( (iOff = fts5GetU16(&pLeaf->p[0])) && iOffn ){ iOff += sqlite3Fts5GetVarint(&pLeaf->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; } else if( (iOff = fts5GetU16(&pLeaf->p[2])) ){ pIter->iLeafOffset = iOff; bNewTerm = 1; } if( iOff>=pLeaf->n ){ p->rc = FTS5_CORRUPT; return; } } } /* Check if the iterator is now at EOF. If so, return early. */ if( pIter->pLeaf ){ if( bNewTerm ){ if( pIter->flags & FTS5_SEGITER_ONETERM ){ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; }else{ fts5SegIterLoadTerm(p, pIter, nKeep); fts5SegIterLoadNPos(p, pIter); if( pbNewTerm ) *pbNewTerm = 1; } }else{ fts5SegIterLoadNPos(p, pIter); } } } } } #define SWAPVAL(T, a, b) { T tmp; tmp=a; a=b; b=tmp; } /* ** Iterator pIter currently points to the first rowid in a doclist. This ** function sets the iterator up so that iterates in reverse order through ** the doclist. */ static void fts5SegIterReverse(Fts5Index *p, Fts5SegIter *pIter){ Fts5DlidxIter *pDlidx = pIter->pDlidx; Fts5Data *pLast = 0; int pgnoLast = 0; if( pDlidx ){ int iSegid = pIter->pSeg->iSegid; pgnoLast = fts5DlidxIterPgno(pDlidx); pLast = fts5DataRead(p, FTS5_SEGMENT_ROWID(iSegid, 0, pgnoLast)); }else{ int iOff; /* Byte offset within pLeaf */ Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ /* Currently, Fts5SegIter.iLeafOffset (and iOff) points to the first ** byte of position-list content for the current rowid. Back it up ** so that it points to the start of the position-list size field. */ pIter->iLeafOffset -= sqlite3Fts5GetVarintLen(pIter->nPos*2+pIter->bDel); iOff = pIter->iLeafOffset; assert( iOff>=4 ); /* Search for a new term within the current leaf. If one can be found, ** then this page contains the largest rowid for the current term. */ while( iOffn ){ int nPos; i64 iDelta; int bDummy; /* Read the position-list size field */ iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); iOff += nPos; if( iOff>=pLeaf->n ) break; /* Rowid delta. Or, if 0x00, the end of doclist marker. */ nPos = fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); if( iDelta==0 ) break; iOff += nPos; } /* If this condition is true then the largest rowid for the current ** term may not be stored on the current page. So search forward to ** see where said rowid really is. */ if( iOff>=pLeaf->n ){ int pgno; Fts5StructureSegment *pSeg = pIter->pSeg; /* The last rowid in the doclist may not be on the current page. Search ** forward to find the page containing the last rowid. */ for(pgno=pIter->iLeafPgno+1; !p->rc && pgno<=pSeg->pgnoLast; pgno++){ i64 iAbs = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, pgno); Fts5Data *pNew = fts5DataRead(p, iAbs); if( pNew ){ int iRowid, iTerm; fts5LeafHeader(pNew, &iRowid, &iTerm); if( iRowid ){ SWAPVAL(Fts5Data*, pNew, pLast); pgnoLast = pgno; } fts5DataRelease(pNew); if( iTerm ) break; } } } } /* If pLast is NULL at this point, then the last rowid for this doclist ** lies on the page currently indicated by the iterator. In this case ** pIter->iLeafOffset is already set to point to the position-list size ** field associated with the first relevant rowid on the page. ** ** Or, if pLast is non-NULL, then it is the page that contains the last ** rowid. In this case configure the iterator so that it points to the ** first rowid on this page. */ if( pLast ){ int dummy; int iOff; fts5DataRelease(pIter->pLeaf); pIter->pLeaf = pLast; pIter->iLeafPgno = pgnoLast; fts5LeafHeader(pLast, &iOff, &dummy); iOff += fts5GetVarint(&pLast->p[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; } fts5SegIterReverseInitPage(p, pIter); } /* ** Iterator pIter currently points to the first rowid of a doclist. ** There is a doclist-index associated with the final term on the current ** page. If the current term is the last term on the page, load the ** doclist-index from disk and initialize an iterator at (pIter->pDlidx). */ static void fts5SegIterLoadDlidx(Fts5Index *p, Fts5SegIter *pIter){ int iSeg = pIter->pSeg->iSegid; int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); Fts5Data *pLeaf = pIter->pLeaf; /* Current leaf data */ assert( pIter->flags & FTS5_SEGITER_ONETERM ); assert( pIter->pDlidx==0 ); /* Check if the current doclist ends on this page. If it does, return ** early without loading the doclist-index (as it belongs to a different ** term. */ if( pIter->iTermLeafPgno==pIter->iLeafPgno ){ int iOff = pIter->iLeafOffset + pIter->nPos; while( iOffn ){ int bDummy; int nPos; i64 iDelta; /* iOff is currently the offset of the start of position list data */ iOff += fts5GetVarint(&pLeaf->p[iOff], (u64*)&iDelta); if( iDelta==0 ) return; assert_nc( iOffn ); iOff += fts5GetPoslistSize(&pLeaf->p[iOff], &nPos, &bDummy); iOff += nPos; } } pIter->pDlidx = fts5DlidxIterInit(p, bRev, iSeg, pIter->iTermLeafPgno); } /* ** Initialize the object pIter to point to term pTerm/nTerm within segment ** pSeg. If there is no such term in the index, the iterator is set to EOF. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If ** an error has already occurred when this function is called, it is a no-op. */ static void fts5SegIterSeekInit( Fts5Index *p, /* FTS5 backend */ const u8 *pTerm, int nTerm, /* Term to seek to */ int flags, /* Mask of FTS5INDEX_XXX flags */ Fts5StructureSegment *pSeg, /* Description of segment */ Fts5SegIter *pIter /* Object to populate */ ){ int iPg = 1; int h; int bGe = (flags & FTS5INDEX_QUERY_SCAN); int bDlidx = 0; /* True if there is a doclist-index */ Fts5Data *pLeaf; assert( bGe==0 || (flags & FTS5INDEX_QUERY_DESC)==0 ); assert( pTerm && nTerm ); memset(pIter, 0, sizeof(*pIter)); pIter->pSeg = pSeg; /* This block sets stack variable iPg to the leaf page number that may ** contain term (pTerm/nTerm), if it is present in the segment. */ for(h=pSeg->nHeight-1; h>0; h--){ Fts5NodeIter node; /* For iterating through internal nodes */ i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, h, iPg); Fts5Data *pNode = fts5DataRead(p, iRowid); if( pNode==0 ) break; fts5NodeIterInit(pNode->p, pNode->n, &node); assert( node.term.n==0 ); iPg = node.iChild; bDlidx = node.bDlidx; for(fts5NodeIterNext(&p->rc, &node); node.aData && fts5BufferCompareBlob(&node.term, pTerm, nTerm)<=0; fts5NodeIterNext(&p->rc, &node) ){ iPg = node.iChild; bDlidx = node.bDlidx; } fts5NodeIterFree(&node); fts5DataRelease(pNode); } if( iPgpgnoFirst ){ iPg = pSeg->pgnoFirst; bDlidx = 0; } pIter->iLeafPgno = iPg - 1; fts5SegIterNextPage(p, pIter); if( (pLeaf = pIter->pLeaf) ){ int res; pIter->iLeafOffset = fts5GetU16(&pLeaf->p[2]); if( pIter->iLeafOffset<4 || pIter->iLeafOffset>=pLeaf->n ){ p->rc = FTS5_CORRUPT; }else{ fts5SegIterLoadTerm(p, pIter, 0); fts5SegIterLoadNPos(p, pIter); do { res = fts5BufferCompareBlob(&pIter->term, pTerm, nTerm); if( res>=0 ) break; fts5SegIterNext(p, pIter, 0); }while( pIter->pLeaf && p->rc==SQLITE_OK ); if( bGe==0 && res ){ /* Set iterator to point to EOF */ fts5DataRelease(pIter->pLeaf); pIter->pLeaf = 0; } } } if( p->rc==SQLITE_OK && bGe==0 ){ pIter->flags |= FTS5_SEGITER_ONETERM; if( pIter->pLeaf ){ if( flags & FTS5INDEX_QUERY_DESC ){ pIter->flags |= FTS5_SEGITER_REVERSE; } if( bDlidx ){ fts5SegIterLoadDlidx(p, pIter); } if( flags & FTS5INDEX_QUERY_DESC ){ fts5SegIterReverse(p, pIter); } } } } /* ** Initialize the object pIter to point to term pTerm/nTerm within the ** in-memory hash table. If there is no such term in the hash-table, the ** iterator is set to EOF. ** ** If an error occurs, Fts5Index.rc is set to an appropriate error code. If ** an error has already occurred when this function is called, it is a no-op. */ static void fts5SegIterHashInit( Fts5Index *p, /* FTS5 backend */ const u8 *pTerm, int nTerm, /* Term to seek to */ int flags, /* Mask of FTS5INDEX_XXX flags */ Fts5SegIter *pIter /* Object to populate */ ){ const u8 *pList = 0; int nList = 0; const u8 *z = 0; int n = 0; assert( p->pHash ); assert( p->rc==SQLITE_OK ); if( pTerm==0 || (flags & FTS5INDEX_QUERY_SCAN) ){ p->rc = sqlite3Fts5HashScanInit(p->pHash, (const char*)pTerm, nTerm); sqlite3Fts5HashScanEntry(p->pHash, (const char**)&z, &pList, &nList); n = (z ? strlen((const char*)z) : 0); }else{ pIter->flags |= FTS5_SEGITER_ONETERM; sqlite3Fts5HashQuery(p->pHash, (const char*)pTerm, nTerm, &pList, &nList); z = pTerm; n = nTerm; } if( pList ){ Fts5Data *pLeaf; sqlite3Fts5BufferSet(&p->rc, &pIter->term, n, z); pLeaf = fts5IdxMalloc(p, sizeof(Fts5Data)); if( pLeaf==0 ) return; pLeaf->p = (u8*)pList; pLeaf->n = nList; pIter->pLeaf = pLeaf; pIter->iLeafOffset = fts5GetVarint(pLeaf->p, (u64*)&pIter->iRowid); if( flags & FTS5INDEX_QUERY_DESC ){ pIter->flags |= FTS5_SEGITER_REVERSE; fts5SegIterReverseInitPage(p, pIter); }else{ fts5SegIterLoadNPos(p, pIter); } } } /* ** Zero the iterator passed as the only argument. */ static void fts5SegIterClear(Fts5SegIter *pIter){ fts5BufferFree(&pIter->term); fts5DataRelease(pIter->pLeaf); fts5DataRelease(pIter->pNextLeaf); fts5DlidxIterFree(pIter->pDlidx); sqlite3_free(pIter->aRowidOffset); memset(pIter, 0, sizeof(Fts5SegIter)); } #ifdef SQLITE_DEBUG /* ** This function is used as part of the big assert() procedure implemented by ** fts5AssertMultiIterSetup(). It ensures that the result currently stored ** in *pRes is the correct result of comparing the current positions of the ** two iterators. */ static void fts5AssertComparisonResult( Fts5MultiSegIter *pIter, Fts5SegIter *p1, Fts5SegIter *p2, Fts5CResult *pRes ){ int i1 = p1 - pIter->aSeg; int i2 = p2 - pIter->aSeg; if( p1->pLeaf || p2->pLeaf ){ if( p1->pLeaf==0 ){ assert( pRes->iFirst==i2 ); }else if( p2->pLeaf==0 ){ assert( pRes->iFirst==i1 ); }else{ int nMin = MIN(p1->term.n, p2->term.n); int res = memcmp(p1->term.p, p2->term.p, nMin); if( res==0 ) res = p1->term.n - p2->term.n; if( res==0 ){ assert( pRes->bTermEq==1 ); assert( p1->iRowid!=p2->iRowid ); res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : 1; }else{ assert( pRes->bTermEq==0 ); } if( res<0 ){ assert( pRes->iFirst==i1 ); }else{ assert( pRes->iFirst==i2 ); } } } } /* ** This function is a no-op unless SQLITE_DEBUG is defined when this module ** is compiled. In that case, this function is essentially an assert() ** statement used to verify that the contents of the pIter->aFirst[] array ** are correct. */ static void fts5AssertMultiIterSetup(Fts5Index *p, Fts5MultiSegIter *pIter){ if( p->rc==SQLITE_OK ){ int i; for(i=0; inSeg; i+=2){ Fts5SegIter *p1 = &pIter->aSeg[i]; Fts5SegIter *p2 = &pIter->aSeg[i+1]; Fts5CResult *pRes = &pIter->aFirst[(pIter->nSeg + i) / 2]; fts5AssertComparisonResult(pIter, p1, p2, pRes); } for(i=1; i<(pIter->nSeg / 2); i+=2){ Fts5CResult *pRes = &pIter->aFirst[i]; Fts5SegIter *p1 = &pIter->aSeg[ pIter->aFirst[i*2].iFirst ]; Fts5SegIter *p2 = &pIter->aSeg[ pIter->aFirst[i*2+1].iFirst ]; fts5AssertComparisonResult(pIter, p1, p2, pRes); } } } #else # define fts5AssertMultiIterSetup(x,y) #endif /* ** Do the comparison necessary to populate pIter->aFirst[iOut]. ** ** If the returned value is non-zero, then it is the index of an entry ** in the pIter->aSeg[] array that is (a) not at EOF, and (b) pointing ** to a key that is a duplicate of another, higher priority, ** segment-iterator in the pSeg->aSeg[] array. */ static int fts5MultiIterDoCompare(Fts5MultiSegIter *pIter, int iOut){ int i1; /* Index of left-hand Fts5SegIter */ int i2; /* Index of right-hand Fts5SegIter */ int iRes; Fts5SegIter *p1; /* Left-hand Fts5SegIter */ Fts5SegIter *p2; /* Right-hand Fts5SegIter */ Fts5CResult *pRes = &pIter->aFirst[iOut]; assert( iOutnSeg && iOut>0 ); assert( pIter->bRev==0 || pIter->bRev==1 ); if( iOut>=(pIter->nSeg/2) ){ i1 = (iOut - pIter->nSeg/2) * 2; i2 = i1 + 1; }else{ i1 = pIter->aFirst[iOut*2].iFirst; i2 = pIter->aFirst[iOut*2+1].iFirst; } p1 = &pIter->aSeg[i1]; p2 = &pIter->aSeg[i2]; pRes->bTermEq = 0; if( p1->pLeaf==0 ){ /* If p1 is at EOF */ iRes = i2; }else if( p2->pLeaf==0 ){ /* If p2 is at EOF */ iRes = i1; }else{ int res = fts5BufferCompare(&p1->term, &p2->term); if( res==0 ){ assert( i2>i1 ); assert( i2!=0 ); pRes->bTermEq = 1; if( p1->iRowid==p2->iRowid ){ p1->bDel = p2->bDel; return i2; } res = ((p1->iRowid > p2->iRowid)==pIter->bRev) ? -1 : +1; } assert( res!=0 ); if( res<0 ){ iRes = i1; }else{ iRes = i2; } } pRes->iFirst = iRes; return 0; } /* ** Move the seg-iter so that it points to the first rowid on page iLeafPgno. ** It is an error if leaf iLeafPgno does not exist or contains no rowids. */ static void fts5SegIterGotoPage( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter, /* Iterator to advance */ int iLeafPgno ){ assert( iLeafPgno>pIter->iLeafPgno ); if( iLeafPgno>pIter->pSeg->pgnoLast ){ p->rc = FTS5_CORRUPT; }else{ fts5DataRelease(pIter->pNextLeaf); pIter->pNextLeaf = 0; pIter->iLeafPgno = iLeafPgno-1; fts5SegIterNextPage(p, pIter); assert( p->rc!=SQLITE_OK || pIter->iLeafPgno==iLeafPgno ); if( p->rc==SQLITE_OK ){ int iOff; u8 *a = pIter->pLeaf->p; int n = pIter->pLeaf->n; iOff = fts5GetU16(&a[0]); if( iOff<4 || iOff>=n ){ p->rc = FTS5_CORRUPT; }else{ iOff += fts5GetVarint(&a[iOff], (u64*)&pIter->iRowid); pIter->iLeafOffset = iOff; fts5SegIterLoadNPos(p, pIter); } } } } /* ** Advance the iterator passed as the second argument until it is at or ** past rowid iFrom. Regardless of the value of iFrom, the iterator is ** always advanced at least once. */ static void fts5SegIterNextFrom( Fts5Index *p, /* FTS5 backend object */ Fts5SegIter *pIter, /* Iterator to advance */ i64 iMatch /* Advance iterator at least this far */ ){ int bRev = (pIter->flags & FTS5_SEGITER_REVERSE); Fts5DlidxIter *pDlidx = pIter->pDlidx; int iLeafPgno = pIter->iLeafPgno; int bMove = 1; assert( pIter->flags & FTS5_SEGITER_ONETERM ); assert( pIter->pDlidx ); assert( pIter->pLeaf ); if( bRev==0 ){ while( !fts5DlidxIterEof(p, pDlidx) && iMatch>fts5DlidxIterRowid(pDlidx) ){ iLeafPgno = fts5DlidxIterPgno(pDlidx); fts5DlidxIterNext(p, pDlidx); } assert_nc( iLeafPgno>=pIter->iLeafPgno || p->rc ); if( iLeafPgno>pIter->iLeafPgno ){ fts5SegIterGotoPage(p, pIter, iLeafPgno); bMove = 0; } }else{ assert( pIter->pNextLeaf==0 ); assert( iMatchiRowid ); while( !fts5DlidxIterEof(p, pDlidx) && iMatchiLeafPgno ); if( iLeafPgnoiLeafPgno ){ pIter->iLeafPgno = iLeafPgno+1; fts5SegIterReverseNewPage(p, pIter); bMove = 0; } } while( p->rc==SQLITE_OK ){ if( bMove ) fts5SegIterNext(p, pIter, 0); if( pIter->pLeaf==0 ) break; if( bRev==0 && pIter->iRowid>=iMatch ) break; if( bRev!=0 && pIter->iRowid<=iMatch ) break; bMove = 1; } } /* ** Free the iterator object passed as the second argument. */ static void fts5MultiIterFree(Fts5Index *p, Fts5MultiSegIter *pIter){ if( pIter ){ int i; for(i=0; inSeg; i++){ fts5SegIterClear(&pIter->aSeg[i]); } sqlite3_free(pIter); } } static void fts5MultiIterAdvanced( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5MultiSegIter *pIter, /* Iterator to update aFirst[] array for */ int iChanged, /* Index of sub-iterator just advanced */ int iMinset /* Minimum entry in aFirst[] to set */ ){ int i; for(i=(pIter->nSeg+iChanged)/2; i>=iMinset && p->rc==SQLITE_OK; i=i/2){ int iEq; if( (iEq = fts5MultiIterDoCompare(pIter, i)) ){ fts5SegIterNext(p, &pIter->aSeg[iEq], 0); i = pIter->nSeg + iEq; } } } static int fts5MultiIterAdvanceRowid( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5MultiSegIter *pIter, /* Iterator to update aFirst[] array for */ int iChanged /* Index of sub-iterator just advanced */ ){ int i; Fts5SegIter *pNew = &pIter->aSeg[iChanged]; Fts5SegIter *pOther = &pIter->aSeg[iChanged ^ 0x0001]; for(i=(pIter->nSeg+iChanged)/2; p->rc==SQLITE_OK; i=i/2){ Fts5CResult *pRes = &pIter->aFirst[i]; assert( pNew->pLeaf ); assert( pRes->bTermEq==0 || pOther->pLeaf ); if( pRes->bTermEq ){ if( pNew->iRowid==pOther->iRowid ){ return 1; }else if( (pOther->iRowid>pNew->iRowid)==pIter->bRev ){ pNew = pOther; } } pRes->iFirst = (pNew - pIter->aSeg); if( i==1 ) break; pOther = &pIter->aSeg[ pIter->aFirst[i ^ 0x0001].iFirst ]; } return 0; } /* ** Move the iterator to the next entry. ** ** If an error occurs, an error code is left in Fts5Index.rc. It is not ** considered an error if the iterator reaches EOF, or if it is already at ** EOF when this function is called. */ static void fts5MultiIterNext( Fts5Index *p, Fts5MultiSegIter *pIter, int bFrom, /* True if argument iFrom is valid */ i64 iFrom /* Advance at least as far as this */ ){ if( p->rc==SQLITE_OK ){ int bUseFrom = bFrom; do { int iFirst = pIter->aFirst[1].iFirst; int bNewTerm = 0; Fts5SegIter *pSeg = &pIter->aSeg[iFirst]; assert( p->rc==SQLITE_OK ); if( bUseFrom && pSeg->pDlidx ){ fts5SegIterNextFrom(p, pSeg, iFrom); }else{ fts5SegIterNext(p, pSeg, &bNewTerm); } if( pSeg->pLeaf==0 || bNewTerm || fts5MultiIterAdvanceRowid(p, pIter, iFirst) ){ fts5MultiIterAdvanced(p, pIter, iFirst, 1); } fts5AssertMultiIterSetup(p, pIter); bUseFrom = 0; }while( pIter->bSkipEmpty && fts5MultiIterIsEmpty(p, pIter) ); } } static Fts5MultiSegIter *fts5MultiIterAlloc( Fts5Index *p, /* FTS5 backend to iterate within */ int nSeg ){ Fts5MultiSegIter *pNew; int nSlot; /* Power of two >= nSeg */ for(nSlot=2; nSlotaSeg[] */ sizeof(Fts5CResult) * nSlot /* pNew->aFirst[] */ ); if( pNew ){ pNew->nSeg = nSlot; pNew->aSeg = (Fts5SegIter*)&pNew[1]; pNew->aFirst = (Fts5CResult*)&pNew->aSeg[nSlot]; } return pNew; } /* ** Allocate a new Fts5MultiSegIter object. ** ** The new object will be used to iterate through data in structure pStruct. ** If iLevel is -ve, then all data in all segments is merged. Or, if iLevel ** is zero or greater, data from the first nSegment segments on level iLevel ** is merged. ** ** The iterator initially points to the first term/rowid entry in the ** iterated data. */ static void fts5MultiIterNew( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Structure *pStruct, /* Structure of specific index */ int bSkipEmpty, /* True to ignore delete-keys */ int flags, /* FTS5INDEX_QUERY_XXX flags */ const u8 *pTerm, int nTerm, /* Term to seek to (or NULL/0) */ int iLevel, /* Level to iterate (-1 for all) */ int nSegment, /* Number of segments to merge (iLevel>=0) */ Fts5MultiSegIter **ppOut /* New object */ ){ int nSeg = 0; /* Number of segment-iters in use */ int iIter = 0; /* */ int iSeg; /* Used to iterate through segments */ Fts5StructureLevel *pLvl; Fts5MultiSegIter *pNew; assert( (pTerm==0 && nTerm==0) || iLevel<0 ); /* Allocate space for the new multi-seg-iterator. */ if( p->rc==SQLITE_OK ){ if( iLevel<0 ){ assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); nSeg = pStruct->nSegment; nSeg += (p->pHash ? 1 : 0); }else{ nSeg = MIN(pStruct->aLevel[iLevel].nSeg, nSegment); } } *ppOut = pNew = fts5MultiIterAlloc(p, nSeg); if( pNew==0 ) return; pNew->bRev = (0!=(flags & FTS5INDEX_QUERY_DESC)); pNew->bSkipEmpty = bSkipEmpty; /* Initialize each of the component segment iterators. */ if( iLevel<0 ){ Fts5StructureLevel *pEnd = &pStruct->aLevel[pStruct->nLevel]; if( p->pHash ){ /* Add a segment iterator for the current contents of the hash table. */ Fts5SegIter *pIter = &pNew->aSeg[iIter++]; fts5SegIterHashInit(p, pTerm, nTerm, flags, pIter); } for(pLvl=&pStruct->aLevel[0]; pLvlnSeg-1; iSeg>=0; iSeg--){ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; Fts5SegIter *pIter = &pNew->aSeg[iIter++]; if( pTerm==0 ){ fts5SegIterInit(p, pSeg, pIter); }else{ fts5SegIterSeekInit(p, pTerm, nTerm, flags, pSeg, pIter); } } } }else{ pLvl = &pStruct->aLevel[iLevel]; for(iSeg=nSeg-1; iSeg>=0; iSeg--){ fts5SegIterInit(p, &pLvl->aSeg[iSeg], &pNew->aSeg[iIter++]); } } assert( iIter==nSeg ); /* If the above was successful, each component iterators now points ** to the first entry in its segment. In this case initialize the ** aFirst[] array. Or, if an error has occurred, free the iterator ** object and set the output variable to NULL. */ if( p->rc==SQLITE_OK ){ for(iIter=pNew->nSeg-1; iIter>0; iIter--){ int iEq; if( (iEq = fts5MultiIterDoCompare(pNew, iIter)) ){ fts5SegIterNext(p, &pNew->aSeg[iEq], 0); fts5MultiIterAdvanced(p, pNew, iEq, iIter); } } fts5AssertMultiIterSetup(p, pNew); if( pNew->bSkipEmpty && fts5MultiIterIsEmpty(p, pNew) ){ fts5MultiIterNext(p, pNew, 0, 0); } }else{ fts5MultiIterFree(p, pNew); *ppOut = 0; } } /* ** Create an Fts5MultiSegIter that iterates through the doclist provided ** as the second argument. */ static void fts5MultiIterNew2( Fts5Index *p, /* FTS5 backend to iterate within */ Fts5Data *pData, /* Doclist to iterate through */ int bDesc, /* True for descending rowid order */ Fts5MultiSegIter **ppOut /* New object */ ){ Fts5MultiSegIter *pNew; pNew = fts5MultiIterAlloc(p, 2); if( pNew ){ Fts5SegIter *pIter = &pNew->aSeg[1]; pIter->flags = FTS5_SEGITER_ONETERM; if( pData->n>0 ){ pIter->pLeaf = pData; pIter->iLeafOffset = fts5GetVarint(pData->p, (u64*)&pIter->iRowid); pNew->aFirst[1].iFirst = 1; if( bDesc ){ pNew->bRev = 1; pIter->flags |= FTS5_SEGITER_REVERSE; fts5SegIterReverseInitPage(p, pIter); }else{ fts5SegIterLoadNPos(p, pIter); } pData = 0; } *ppOut = pNew; } fts5DataRelease(pData); } /* ** Return true if the iterator is at EOF or if an error has occurred. ** False otherwise. */ static int fts5MultiIterEof(Fts5Index *p, Fts5MultiSegIter *pIter){ return (p->rc || pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf==0); } /* ** Return the rowid of the entry that the iterator currently points ** to. If the iterator points to EOF when this function is called the ** results are undefined. */ static i64 fts5MultiIterRowid(Fts5MultiSegIter *pIter){ assert( pIter->aSeg[ pIter->aFirst[1].iFirst ].pLeaf ); return pIter->aSeg[ pIter->aFirst[1].iFirst ].iRowid; } /* ** Move the iterator to the next entry at or following iMatch. */ static void fts5MultiIterNextFrom( Fts5Index *p, Fts5MultiSegIter *pIter, i64 iMatch ){ while( 1 ){ i64 iRowid; fts5MultiIterNext(p, pIter, 1, iMatch); if( fts5MultiIterEof(p, pIter) ) break; iRowid = fts5MultiIterRowid(pIter); if( pIter->bRev==0 && iRowid>=iMatch ) break; if( pIter->bRev!=0 && iRowid<=iMatch ) break; } } /* ** Return a pointer to a buffer containing the term associated with the ** entry that the iterator currently points to. */ static const u8 *fts5MultiIterTerm(Fts5MultiSegIter *pIter, int *pn){ Fts5SegIter *p = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; *pn = p->term.n; return p->term.p; } static void fts5ChunkIterate( Fts5Index *p, /* Index object */ Fts5SegIter *pSeg, /* Poslist of this iterator */ void *pCtx, /* Context pointer for xChunk callback */ void (*xChunk)(Fts5Index*, void*, const u8*, int) ){ int nRem = pSeg->nPos; /* Number of bytes still to come */ Fts5Data *pData = 0; u8 *pChunk = &pSeg->pLeaf->p[pSeg->iLeafOffset]; int nChunk = MIN(nRem, pSeg->pLeaf->n - pSeg->iLeafOffset); int pgno = pSeg->iLeafPgno; int pgnoSave = 0; if( (pSeg->flags & FTS5_SEGITER_REVERSE)==0 ){ pgnoSave = pgno+1; } while( 1 ){ xChunk(p, pCtx, pChunk, nChunk); nRem -= nChunk; fts5DataRelease(pData); if( nRem<=0 ){ break; }else{ pgno++; pData = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->pSeg->iSegid, 0, pgno)); if( pData==0 ) break; pChunk = &pData->p[4]; nChunk = MIN(nRem, pData->n - 4); if( pgno==pgnoSave ){ assert( pSeg->pNextLeaf==0 ); pSeg->pNextLeaf = pData; pData = 0; } } } } /* ** Allocate a new segment-id for the structure pStruct. The new segment ** id must be between 1 and 65335 inclusive, and must not be used by ** any currently existing segment. If a free segment id cannot be found, ** SQLITE_FULL is returned. ** ** If an error has already occurred, this function is a no-op. 0 is ** returned in this case. */ static int fts5AllocateSegid(Fts5Index *p, Fts5Structure *pStruct){ u32 iSegid = 0; if( p->rc==SQLITE_OK ){ if( pStruct->nSegment>=FTS5_MAX_SEGMENT ){ p->rc = SQLITE_FULL; }else{ while( iSegid==0 ){ int iLvl, iSeg; sqlite3_randomness(sizeof(u32), (void*)&iSegid); iSegid = (iSegid % ((1 << FTS5_DATA_ID_B) - 2)) + 1; assert( iSegid>0 && iSegid<=65535 ); for(iLvl=0; iLvlnLevel; iLvl++){ for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ if( iSegid==pStruct->aLevel[iLvl].aSeg[iSeg].iSegid ){ iSegid = 0; } } } } } } return (int)iSegid; } /* ** Discard all data currently cached in the hash-tables. */ static void fts5IndexDiscardData(Fts5Index *p){ assert( p->pHash || p->nPendingData==0 ); if( p->pHash ){ sqlite3Fts5HashClear(p->pHash); p->nPendingData = 0; } } /* ** Return the size of the prefix, in bytes, that buffer (nNew/pNew) shares ** with buffer (nOld/pOld). */ static int fts5PrefixCompress( int nOld, const u8 *pOld, int nNew, const u8 *pNew ){ int i; assert( fts5BlobCompare(pOld, nOld, pNew, nNew)<0 ); for(i=0; inDlidx>0 && pWriter->aDlidx[0].buf.n>0) ); for(i=0; inDlidx; i++){ Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; if( pDlidx->buf.n==0 ) break; if( bFlush ){ assert( pDlidx->pgno!=0 ); fts5DataWrite(p, FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), pDlidx->buf.p, pDlidx->buf.n ); } sqlite3Fts5BufferZero(&pDlidx->buf); pDlidx->bPrevValid = 0; } } /* ** Grow the pWriter->aDlidx[] array to at least nLvl elements in size. ** Any new array elements are zeroed before returning. */ static int fts5WriteDlidxGrow( Fts5Index *p, Fts5SegWriter *pWriter, int nLvl ){ if( p->rc==SQLITE_OK && nLvl>=pWriter->nDlidx ){ Fts5DlidxWriter *aDlidx = (Fts5DlidxWriter*)sqlite3_realloc( pWriter->aDlidx, sizeof(Fts5DlidxWriter) * nLvl ); if( aDlidx==0 ){ p->rc = SQLITE_NOMEM; }else{ int nByte = sizeof(Fts5DlidxWriter) * (nLvl - pWriter->nDlidx); memset(&aDlidx[pWriter->nDlidx], 0, nByte); pWriter->aDlidx = aDlidx; pWriter->nDlidx = nLvl; } } return p->rc; } /* ** If an "nEmpty" record must be written to the b-tree before the next ** term, write it now. */ static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){ if( pWriter->nEmpty ){ int bFlag = 0; Fts5PageWriter *pPg; pPg = &pWriter->aWriter[1]; /* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written ** to the database, also write the doclist-index to disk. */ if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ bFlag = 1; } fts5WriteDlidxClear(p, pWriter, bFlag); fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag); fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty); pWriter->nEmpty = 0; }else{ fts5WriteDlidxClear(p, pWriter, 0); } assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].buf.n==0 ); assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].bPrevValid==0 ); } static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){ if( p->rc==SQLITE_OK ){ Fts5PageWriter *aNew; Fts5PageWriter *pNew; int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1); aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew); if( aNew==0 ){ p->rc = SQLITE_NOMEM; return; } pNew = &aNew[pWriter->nWriter]; memset(pNew, 0, sizeof(Fts5PageWriter)); pNew->pgno = 1; fts5BufferAppendVarint(&p->rc, &pNew->buf, 1); pWriter->nWriter++; pWriter->aWriter = aNew; } } /* ** This is called once for each leaf page except the first that contains ** at least one term. Argument (nTerm/pTerm) is the split-key - a term that ** is larger than all terms written to earlier leaves, and equal to or ** smaller than the first term on the new leaf. ** ** If an error occurs, an error code is left in Fts5Index.rc. If an error ** has already occurred when this function is called, it is a no-op. */ static void fts5WriteBtreeTerm( Fts5Index *p, /* FTS5 backend object */ Fts5SegWriter *pWriter, /* Writer object */ int nTerm, const u8 *pTerm /* First term on new page */ ){ int iHeight; for(iHeight=1; 1; iHeight++){ Fts5PageWriter *pPage; if( iHeight>=pWriter->nWriter ){ fts5WriteBtreeGrow(p, pWriter); if( p->rc ) return; } pPage = &pWriter->aWriter[iHeight]; fts5WriteBtreeNEmpty(p, pWriter); if( pPage->buf.n>=p->pConfig->pgsz ){ /* pPage will be written to disk. The term will be written into the ** parent of pPage. */ i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, iHeight, pPage->pgno); fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); fts5BufferZero(&pPage->buf); fts5BufferZero(&pPage->term); fts5BufferAppendVarint(&p->rc, &pPage->buf, pPage[-1].pgno); pPage->pgno++; }else{ int nPre = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); fts5BufferAppendVarint(&p->rc, &pPage->buf, nPre+2); fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm-nPre); fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm-nPre, pTerm+nPre); fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); break; } } } /* ** This function is called when flushing a leaf page that contains no ** terms at all to disk. */ static void fts5WriteBtreeNoTerm( Fts5Index *p, /* FTS5 backend object */ Fts5SegWriter *pWriter /* Writer object */ ){ /* If there were no rowids on the leaf page either and the doclist-index ** has already been started, append an 0x00 byte to it. */ if( pWriter->bFirstRowidInPage && pWriter->aDlidx[0].buf.n>0 ){ Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[0]; assert( pDlidx->bPrevValid ); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, 0); } /* Increment the "number of sequential leaves without a term" counter. */ pWriter->nEmpty++; } static i64 fts5DlidxExtractFirstRowid(Fts5Buffer *pBuf){ i64 iRowid; int iOff; iOff = 1 + fts5GetVarint(&pBuf->p[1], (u64*)&iRowid); fts5GetVarint(&pBuf->p[iOff], (u64*)&iRowid); return iRowid; } /* ** Rowid iRowid has just been appended to the current leaf page. It is the ** first on the page. This function appends an appropriate entry to the current ** doclist-index. */ static void fts5WriteDlidxAppend( Fts5Index *p, Fts5SegWriter *pWriter, i64 iRowid ){ int i; int bDone = 0; for(i=0; p->rc==SQLITE_OK && bDone==0; i++){ i64 iVal; Fts5DlidxWriter *pDlidx = &pWriter->aDlidx[i]; if( pDlidx->buf.n>=p->pConfig->pgsz ){ /* The current doclist-index page is full. Write it to disk and push ** a copy of iRowid (which will become the first rowid on the next ** doclist-index leaf page) up into the next level of the b-tree ** hierarchy. If the node being flushed is currently the root node, ** also push its first rowid upwards. */ pDlidx->buf.p[0] = 0x01; /* Not the root node */ fts5DataWrite(p, FTS5_DLIDX_ROWID(pWriter->iSegid, i, pDlidx->pgno), pDlidx->buf.p, pDlidx->buf.n ); fts5WriteDlidxGrow(p, pWriter, i+2); pDlidx = &pWriter->aDlidx[i]; if( p->rc==SQLITE_OK && pDlidx[1].buf.n==0 ){ i64 iFirst = fts5DlidxExtractFirstRowid(&pDlidx->buf); /* This was the root node. Push its first rowid up to the new root. */ pDlidx[1].pgno = pDlidx->pgno; sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, 0); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, pDlidx->pgno); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx[1].buf, iFirst); pDlidx[1].bPrevValid = 1; pDlidx[1].iPrev = iFirst; } sqlite3Fts5BufferZero(&pDlidx->buf); pDlidx->bPrevValid = 0; pDlidx->pgno++; }else{ bDone = 1; } if( pDlidx->bPrevValid ){ iVal = iRowid - pDlidx->iPrev; }else{ i64 iPgno = (i==0 ? pWriter->aWriter[0].pgno : pDlidx[-1].pgno); assert( pDlidx->buf.n==0 ); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone); sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno); iVal = iRowid; } sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iVal); pDlidx->bPrevValid = 1; pDlidx->iPrev = iRowid; } } static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; Fts5PageWriter *pPage = &pWriter->aWriter[0]; i64 iRowid; if( pWriter->bFirstTermInPage ){ /* No term was written to this page. */ assert( 0==fts5GetU16(&pPage->buf.p[2]) ); fts5WriteBtreeNoTerm(p, pWriter); } /* Write the current page to the db. */ iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, 0, pPage->pgno); fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n); /* Initialize the next page. */ fts5BufferZero(&pPage->buf); fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); pPage->pgno++; /* Increase the leaves written counter */ pWriter->nLeafWritten++; /* The new leaf holds no terms or rowids */ pWriter->bFirstTermInPage = 1; pWriter->bFirstRowidInPage = 1; } /* ** Append term pTerm/nTerm to the segment being written by the writer passed ** as the second argument. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ static void fts5WriteAppendTerm( Fts5Index *p, Fts5SegWriter *pWriter, int nTerm, const u8 *pTerm ){ int nPrefix; /* Bytes of prefix compression for term */ Fts5PageWriter *pPage = &pWriter->aWriter[0]; assert( pPage->buf.n==0 || pPage->buf.n>4 ); if( pPage->buf.n==0 ){ /* Zero the first term and first docid fields */ static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 }; fts5BufferAppendBlob(&p->rc, &pPage->buf, 4, zero); assert( pWriter->bFirstTermInPage ); } if( p->rc ) return; if( pWriter->bFirstTermInPage ){ /* Update the "first term" field of the page header. */ assert( pPage->buf.p[2]==0 && pPage->buf.p[3]==0 ); fts5PutU16(&pPage->buf.p[2], pPage->buf.n); nPrefix = 0; if( pPage->pgno!=1 ){ /* This is the first term on a leaf that is not the leftmost leaf in ** the segment b-tree. In this case it is necessary to add a term to ** the b-tree hierarchy that is (a) larger than the largest term ** already written to the segment and (b) smaller than or equal to ** this term. In other words, a prefix of (pTerm/nTerm) that is one ** byte longer than the longest prefix (pTerm/nTerm) shares with the ** previous term. ** ** Usually, the previous term is available in pPage->term. The exception ** is if this is the first term written in an incremental-merge step. ** In this case the previous term is not available, so just write a ** copy of (pTerm/nTerm) into the parent node. This is slightly ** inefficient, but still correct. */ int n = nTerm; if( pPage->term.n ){ n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); } fts5WriteBtreeTerm(p, pWriter, n, pTerm); pPage = &pWriter->aWriter[0]; } }else{ nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm); fts5BufferAppendVarint(&p->rc, &pPage->buf, nPrefix); } /* Append the number of bytes of new data, then the term data itself ** to the page. */ fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm - nPrefix); fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm - nPrefix, &pTerm[nPrefix]); /* Update the Fts5PageWriter.term field. */ fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm); pWriter->bFirstTermInPage = 0; pWriter->bFirstRowidInPage = 0; pWriter->bFirstRowidInDoclist = 1; assert( p->rc || (pWriter->nDlidx>0 && pWriter->aDlidx[0].buf.n==0) ); pWriter->aDlidx[0].pgno = pPage->pgno; /* If the current leaf page is full, flush it to disk. */ if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); } } /* ** Append a docid and position-list size field to the writers output. */ static void fts5WriteAppendRowid( Fts5Index *p, Fts5SegWriter *pWriter, i64 iRowid, int nPos ){ if( p->rc==SQLITE_OK ){ Fts5PageWriter *pPage = &pWriter->aWriter[0]; /* If this is to be the first docid written to the page, set the ** docid-pointer in the page-header. Also append a value to the dlidx ** buffer, in case a doclist-index is required. */ if( pWriter->bFirstRowidInPage ){ fts5PutU16(pPage->buf.p, pPage->buf.n); fts5WriteDlidxAppend(p, pWriter, iRowid); } /* Write the docid. */ if( pWriter->bFirstRowidInDoclist || pWriter->bFirstRowidInPage ){ fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid); }else{ assert( p->rc || iRowid>pWriter->iPrevRowid ); fts5BufferAppendVarint(&p->rc, &pPage->buf, iRowid - pWriter->iPrevRowid); } pWriter->iPrevRowid = iRowid; pWriter->bFirstRowidInDoclist = 0; pWriter->bFirstRowidInPage = 0; fts5BufferAppendVarint(&p->rc, &pPage->buf, nPos); if( pPage->buf.n>=p->pConfig->pgsz ){ fts5WriteFlushLeaf(p, pWriter); } } } static void fts5WriteAppendPoslistData( Fts5Index *p, Fts5SegWriter *pWriter, const u8 *aData, int nData ){ Fts5PageWriter *pPage = &pWriter->aWriter[0]; const u8 *a = aData; int n = nData; assert( p->pConfig->pgsz>0 ); while( p->rc==SQLITE_OK && (pPage->buf.n + n)>=p->pConfig->pgsz ){ int nReq = p->pConfig->pgsz - pPage->buf.n; int nCopy = 0; while( nCopyrc, &pPage->buf, nCopy, a); a += nCopy; n -= nCopy; fts5WriteFlushLeaf(p, pWriter); } if( n>0 ){ fts5BufferAppendBlob(&p->rc, &pPage->buf, n, a); } } static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){ fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0); } /* ** Flush any data cached by the writer object to the database. Free any ** allocations associated with the writer. */ static void fts5WriteFinish( Fts5Index *p, Fts5SegWriter *pWriter, /* Writer object */ int *pnHeight, /* OUT: Height of the b-tree */ int *pnLeaf /* OUT: Number of leaf pages in b-tree */ ){ int i; if( p->rc==SQLITE_OK ){ Fts5PageWriter *pLeaf = &pWriter->aWriter[0]; if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){ *pnLeaf = 0; *pnHeight = 0; }else{ if( pLeaf->buf.n>4 ){ fts5WriteFlushLeaf(p, pWriter); } *pnLeaf = pLeaf->pgno-1; if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){ fts5WriteBtreeGrow(p, pWriter); } if( pWriter->nWriter>1 ){ fts5WriteBtreeNEmpty(p, pWriter); } *pnHeight = pWriter->nWriter; for(i=1; inWriter; i++){ Fts5PageWriter *pPg = &pWriter->aWriter[i]; fts5DataWrite(p, FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pPg->pgno), pPg->buf.p, pPg->buf.n ); } } } for(i=0; inWriter; i++){ Fts5PageWriter *pPg = &pWriter->aWriter[i]; fts5BufferFree(&pPg->term); fts5BufferFree(&pPg->buf); } sqlite3_free(pWriter->aWriter); for(i=0; inDlidx; i++){ sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf); } sqlite3_free(pWriter->aDlidx); } static void fts5WriteInit( Fts5Index *p, Fts5SegWriter *pWriter, int iSegid ){ memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = iSegid; pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, sizeof(Fts5PageWriter)); if( fts5WriteDlidxGrow(p, pWriter, 1) ) return; pWriter->nWriter = 1; pWriter->nDlidx = 1; pWriter->aWriter[0].pgno = 1; pWriter->bFirstTermInPage = 1; } static void fts5WriteInitForAppend( Fts5Index *p, /* FTS5 backend object */ Fts5SegWriter *pWriter, /* Writer to initialize */ Fts5StructureSegment *pSeg /* Segment object to append to */ ){ int nByte = pSeg->nHeight * sizeof(Fts5PageWriter); memset(pWriter, 0, sizeof(Fts5SegWriter)); pWriter->iSegid = pSeg->iSegid; pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte); pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter)); if( p->rc==SQLITE_OK ){ int pgno = 1; int i; pWriter->nDlidx = 1; pWriter->nWriter = pSeg->nHeight; pWriter->aWriter[0].pgno = pSeg->pgnoLast+1; for(i=pSeg->nHeight-1; i>0; i--){ i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pgno); Fts5PageWriter *pPg = &pWriter->aWriter[i]; pPg->pgno = pgno; fts5DataBuffer(p, &pPg->buf, iRowid); if( p->rc==SQLITE_OK ){ Fts5NodeIter ss; fts5NodeIterInit(pPg->buf.p, pPg->buf.n, &ss); while( ss.aData ) fts5NodeIterNext(&p->rc, &ss); fts5BufferSet(&p->rc, &pPg->term, ss.term.n, ss.term.p); pgno = ss.iChild; fts5NodeIterFree(&ss); } } assert( p->rc!=SQLITE_OK || (pgno+pWriter->nEmpty)==pSeg->pgnoLast ); pWriter->bFirstTermInPage = 1; assert( pWriter->aWriter[0].term.n==0 ); } } /* ** Iterator pIter was used to iterate through the input segments of on an ** incremental merge operation. This function is called if the incremental ** merge step has finished but the input has not been completely exhausted. */ static void fts5TrimSegments(Fts5Index *p, Fts5MultiSegIter *pIter){ int i; Fts5Buffer buf; memset(&buf, 0, sizeof(Fts5Buffer)); for(i=0; inSeg; i++){ Fts5SegIter *pSeg = &pIter->aSeg[i]; if( pSeg->pSeg==0 ){ /* no-op */ }else if( pSeg->pLeaf==0 ){ /* All keys from this input segment have been transfered to the output. ** Set both the first and last page-numbers to 0 to indicate that the ** segment is now empty. */ pSeg->pSeg->pgnoLast = 0; pSeg->pSeg->pgnoFirst = 0; }else{ int iOff = pSeg->iTermLeafOffset; /* Offset on new first leaf page */ i64 iLeafRowid; Fts5Data *pData; int iId = pSeg->pSeg->iSegid; u8 aHdr[4] = {0x00, 0x00, 0x00, 0x04}; iLeafRowid = FTS5_SEGMENT_ROWID(iId, 0, pSeg->iTermLeafPgno); pData = fts5DataRead(p, iLeafRowid); if( pData ){ fts5BufferZero(&buf); fts5BufferAppendBlob(&p->rc, &buf, sizeof(aHdr), aHdr); fts5BufferAppendVarint(&p->rc, &buf, pSeg->term.n); fts5BufferAppendBlob(&p->rc, &buf, pSeg->term.n, pSeg->term.p); fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]); fts5DataRelease(pData); pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno; fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1),iLeafRowid); fts5DataWrite(p, iLeafRowid, buf.p, buf.n); } } } fts5BufferFree(&buf); } static void fts5MergeChunkCallback( Fts5Index *p, void *pCtx, const u8 *pChunk, int nChunk ){ Fts5SegWriter *pWriter = (Fts5SegWriter*)pCtx; fts5WriteAppendPoslistData(p, pWriter, pChunk, nChunk); } /* ** */ static void fts5IndexMergeLevel( Fts5Index *p, /* FTS5 backend object */ Fts5Structure **ppStruct, /* IN/OUT: Stucture of index */ int iLvl, /* Level to read input from */ int *pnRem /* Write up to this many output leaves */ ){ Fts5Structure *pStruct = *ppStruct; Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; Fts5StructureLevel *pLvlOut; Fts5MultiSegIter *pIter = 0; /* Iterator to read input data */ int nRem = pnRem ? *pnRem : 0; /* Output leaf pages left to write */ int nInput; /* Number of input segments */ Fts5SegWriter writer; /* Writer object */ Fts5StructureSegment *pSeg; /* Output segment */ Fts5Buffer term; int bRequireDoclistTerm = 0; /* Doclist terminator (0x00) required */ int bOldest; /* True if the output segment is the oldest */ assert( iLvlnLevel ); assert( pLvl->nMerge<=pLvl->nSeg ); memset(&writer, 0, sizeof(Fts5SegWriter)); memset(&term, 0, sizeof(Fts5Buffer)); if( pLvl->nMerge ){ pLvlOut = &pStruct->aLevel[iLvl+1]; assert( pLvlOut->nSeg>0 ); nInput = pLvl->nMerge; fts5WriteInitForAppend(p, &writer, &pLvlOut->aSeg[pLvlOut->nSeg-1]); pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1]; }else{ int iSegid = fts5AllocateSegid(p, pStruct); /* Extend the Fts5Structure object as required to ensure the output ** segment exists. */ if( iLvl==pStruct->nLevel-1 ){ fts5StructureAddLevel(&p->rc, ppStruct); pStruct = *ppStruct; } fts5StructureExtendLevel(&p->rc, pStruct, iLvl+1, 1, 0); if( p->rc ) return; pLvl = &pStruct->aLevel[iLvl]; pLvlOut = &pStruct->aLevel[iLvl+1]; fts5WriteInit(p, &writer, iSegid); /* Add the new segment to the output level */ pSeg = &pLvlOut->aSeg[pLvlOut->nSeg]; pLvlOut->nSeg++; pSeg->pgnoFirst = 1; pSeg->iSegid = iSegid; pStruct->nSegment++; /* Read input from all segments in the input level */ nInput = pLvl->nSeg; } bOldest = (pLvlOut->nSeg==1 && pStruct->nLevel==iLvl+2); assert( iLvl>=0 ); for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, iLvl, nInput, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ Fts5SegIter *pSeg = &pIter->aSeg[ pIter->aFirst[1].iFirst ]; int nPos; /* position-list size field value */ int nTerm; const u8 *pTerm; /* Check for key annihilation. */ if( pSeg->nPos==0 && (bOldest || pSeg->bDel==0) ) continue; pTerm = fts5MultiIterTerm(pIter, &nTerm); if( nTerm!=term.n || memcmp(pTerm, term.p, nTerm) ){ if( pnRem && writer.nLeafWritten>nRem ){ break; } /* This is a new term. Append a term to the output segment. */ if( bRequireDoclistTerm ){ fts5WriteAppendZerobyte(p, &writer); } fts5WriteAppendTerm(p, &writer, nTerm, pTerm); fts5BufferSet(&p->rc, &term, nTerm, pTerm); bRequireDoclistTerm = 1; } /* Append the rowid to the output */ /* WRITEPOSLISTSIZE */ nPos = pSeg->nPos*2 + pSeg->bDel; fts5WriteAppendRowid(p, &writer, fts5MultiIterRowid(pIter), nPos); /* Append the position-list data to the output */ fts5ChunkIterate(p, pSeg, (void*)&writer, fts5MergeChunkCallback); } /* Flush the last leaf page to disk. Set the output segment b-tree height ** and last leaf page number at the same time. */ fts5WriteFinish(p, &writer, &pSeg->nHeight, &pSeg->pgnoLast); if( fts5MultiIterEof(p, pIter) ){ int i; /* Remove the redundant segments from the %_data table */ for(i=0; iaSeg[i].iSegid); } /* Remove the redundant segments from the input level */ if( pLvl->nSeg!=nInput ){ int nMove = (pLvl->nSeg - nInput) * sizeof(Fts5StructureSegment); memmove(pLvl->aSeg, &pLvl->aSeg[nInput], nMove); } pStruct->nSegment -= nInput; pLvl->nSeg -= nInput; pLvl->nMerge = 0; if( pSeg->pgnoLast==0 ){ pLvlOut->nSeg--; pStruct->nSegment--; } }else{ assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 ); fts5TrimSegments(p, pIter); pLvl->nMerge = nInput; } fts5MultiIterFree(p, pIter); fts5BufferFree(&term); if( pnRem ) *pnRem -= writer.nLeafWritten; } /* ** Do up to nPg pages of automerge work on the index. */ static void fts5IndexMerge( Fts5Index *p, /* FTS5 backend object */ Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ int nPg /* Pages of work to do */ ){ int nRem = nPg; Fts5Structure *pStruct = *ppStruct; while( nRem>0 && p->rc==SQLITE_OK ){ int iLvl; /* To iterate through levels */ int iBestLvl = 0; /* Level offering the most input segments */ int nBest = 0; /* Number of input segments on best level */ /* Set iBestLvl to the level to read input segments from. */ assert( pStruct->nLevel>0 ); for(iLvl=0; iLvlnLevel; iLvl++){ Fts5StructureLevel *pLvl = &pStruct->aLevel[iLvl]; if( pLvl->nMerge ){ if( pLvl->nMerge>nBest ){ iBestLvl = iLvl; nBest = pLvl->nMerge; } break; } if( pLvl->nSeg>nBest ){ nBest = pLvl->nSeg; iBestLvl = iLvl; } } /* If nBest is still 0, then the index must be empty. */ #ifdef SQLITE_DEBUG for(iLvl=0; nBest==0 && iLvlnLevel; iLvl++){ assert( pStruct->aLevel[iLvl].nSeg==0 ); } #endif if( nBestpConfig->nAutomerge && pStruct->aLevel[iBestLvl].nMerge==0 ){ break; } fts5IndexMergeLevel(p, &pStruct, iBestLvl, &nRem); if( p->rc==SQLITE_OK && pStruct->aLevel[iBestLvl].nMerge==0 ){ fts5StructurePromote(p, iBestLvl+1, pStruct); } } *ppStruct = pStruct; } /* ** A total of nLeaf leaf pages of data has just been flushed to a level-0 ** segment. This function updates the write-counter accordingly and, if ** necessary, performs incremental merge work. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ static void fts5IndexAutomerge( Fts5Index *p, /* FTS5 backend object */ Fts5Structure **ppStruct, /* IN/OUT: Current structure of index */ int nLeaf /* Number of output leaves just written */ ){ if( p->rc==SQLITE_OK && p->pConfig->nAutomerge>0 ){ Fts5Structure *pStruct = *ppStruct; u64 nWrite; /* Initial value of write-counter */ int nWork; /* Number of work-quanta to perform */ int nRem; /* Number of leaf pages left to write */ /* Update the write-counter. While doing so, set nWork. */ nWrite = pStruct->nWriteCounter; nWork = (int)(((nWrite + nLeaf) / p->nWorkUnit) - (nWrite / p->nWorkUnit)); pStruct->nWriteCounter += nLeaf; nRem = (int)(p->nWorkUnit * nWork * pStruct->nLevel); fts5IndexMerge(p, ppStruct, nRem); } } static void fts5IndexCrisismerge( Fts5Index *p, /* FTS5 backend object */ Fts5Structure **ppStruct /* IN/OUT: Current structure of index */ ){ const int nCrisis = p->pConfig->nCrisisMerge; Fts5Structure *pStruct = *ppStruct; int iLvl = 0; assert( p->rc!=SQLITE_OK || pStruct->nLevel>0 ); while( p->rc==SQLITE_OK && pStruct->aLevel[iLvl].nSeg>=nCrisis ){ fts5IndexMergeLevel(p, &pStruct, iLvl, 0); fts5StructurePromote(p, iLvl+1, pStruct); iLvl++; } *ppStruct = pStruct; } static int fts5IndexReturn(Fts5Index *p){ int rc = p->rc; p->rc = SQLITE_OK; return rc; } typedef struct Fts5FlushCtx Fts5FlushCtx; struct Fts5FlushCtx { Fts5Index *pIdx; Fts5SegWriter writer; }; /* ** Buffer aBuf[] contains a list of varints, all small enough to fit ** in a 32-bit integer. Return the size of the largest prefix of this ** list nMax bytes or less in size. */ static int fts5PoslistPrefix(const u8 *aBuf, int nMax){ int ret; u32 dummy; ret = fts5GetVarint32(aBuf, dummy); while( 1 ){ int i = fts5GetVarint32(&aBuf[ret], dummy); if( (ret + i) > nMax ) break; ret += i; } return ret; } #define fts5BufferSafeAppendBlob(pBuf, pBlob, nBlob) { \ assert( pBuf->nSpace>=(pBuf->n+nBlob) ); \ memcpy(&pBuf->p[pBuf->n], pBlob, nBlob); \ pBuf->n += nBlob; \ } /* ** Flush the contents of in-memory hash table iHash to a new level-0 ** segment on disk. Also update the corresponding structure record. ** ** If an error occurs, set the Fts5Index.rc error code. If an error has ** already occurred, this function is a no-op. */ static void fts5FlushOneHash(Fts5Index *p){ Fts5Hash *pHash = p->pHash; Fts5Structure *pStruct; int iSegid; int pgnoLast = 0; /* Last leaf page number in segment */ /* Obtain a reference to the index structure and allocate a new segment-id ** for the new level-0 segment. */ pStruct = fts5StructureRead(p); iSegid = fts5AllocateSegid(p, pStruct); if( iSegid ){ const int pgsz = p->pConfig->pgsz; Fts5StructureSegment *pSeg; /* New segment within pStruct */ int nHeight; /* Height of new segment b-tree */ Fts5Buffer *pBuf; /* Buffer in which to assemble leaf page */ const u8 *zPrev = 0; Fts5SegWriter writer; fts5WriteInit(p, &writer, iSegid); /* Pre-allocate the buffer used to assemble leaf pages to the target ** page size. */ assert( pgsz>0 ); pBuf = &writer.aWriter[0].buf; fts5BufferGrow(&p->rc, pBuf, pgsz + 20); /* Begin scanning through hash table entries. This loop runs once for each ** term/doclist currently stored within the hash table. */ if( p->rc==SQLITE_OK ){ memset(pBuf->p, 0, 4); pBuf->n = 4; p->rc = sqlite3Fts5HashScanInit(pHash, 0, 0); } while( p->rc==SQLITE_OK && 0==sqlite3Fts5HashScanEof(pHash) ){ const char *zTerm; /* Buffer containing term */ int nTerm; /* Size of zTerm in bytes */ const u8 *pDoclist; /* Pointer to doclist for this term */ int nDoclist; /* Size of doclist in bytes */ int nSuffix; /* Size of term suffix */ sqlite3Fts5HashScanEntry(pHash, &zTerm, &pDoclist, &nDoclist); nTerm = strlen(zTerm); /* Decide if the term will fit on the current leaf. If it will not, ** flush the leaf to disk here. */ if( (pBuf->n + nTerm + 2) > pgsz ){ fts5WriteFlushLeaf(p, &writer); pBuf = &writer.aWriter[0].buf; if( (nTerm + 32) > pBuf->nSpace ){ fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n); if( p->rc ) break; } } /* Write the term to the leaf. And if it is the first on the leaf, and ** the leaf is not page number 1, push it up into the b-tree hierarchy ** as well. */ if( writer.bFirstTermInPage==0 ){ int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nPre); nSuffix = nTerm - nPre; }else{ fts5PutU16(&pBuf->p[2], pBuf->n); writer.bFirstTermInPage = 0; if( writer.aWriter[0].pgno!=1 ){ int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm); fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm); pBuf = &writer.aWriter[0].buf; assert( nPren += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], nSuffix); fts5BufferSafeAppendBlob(pBuf, (const u8*)&zTerm[nTerm-nSuffix], nSuffix); /* We just wrote a term into page writer.aWriter[0].pgno. If a ** doclist-index is to be generated for this doclist, it will be ** associated with this page. */ assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 ); writer.aDlidx[0].pgno = writer.aWriter[0].pgno; if( pgsz>=(pBuf->n + nDoclist + 1) ){ /* The entire doclist will fit on the current leaf. */ fts5BufferSafeAppendBlob(pBuf, pDoclist, nDoclist); }else{ i64 iRowid = 0; i64 iDelta = 0; int iOff = 0; writer.bFirstRowidInPage = 0; /* The entire doclist will not fit on this leaf. The following ** loop iterates through the poslists that make up the current ** doclist. */ while( p->rc==SQLITE_OK && iOffp[0], pBuf->n); /* first docid on page */ pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iRowid); writer.bFirstRowidInPage = 0; fts5WriteDlidxAppend(p, &writer, iRowid); }else{ pBuf->n += sqlite3Fts5PutVarint(&pBuf->p[pBuf->n], iDelta); } assert( pBuf->n<=pBuf->nSpace ); if( (pBuf->n + nCopy) <= pgsz ){ /* The entire poslist will fit on the current leaf. So copy ** it in one go. */ fts5BufferSafeAppendBlob(pBuf, &pDoclist[iOff], nCopy); }else{ /* The entire poslist will not fit on this leaf. So it needs ** to be broken into sections. The only qualification being ** that each varint must be stored contiguously. */ const u8 *pPoslist = &pDoclist[iOff]; int iPos = 0; while( p->rc==SQLITE_OK ){ int nSpace = pgsz - pBuf->n; int n = 0; if( (nCopy - iPos)<=nSpace ){ n = nCopy - iPos; }else{ n = fts5PoslistPrefix(&pPoslist[iPos], nSpace); } assert( n>0 ); fts5BufferSafeAppendBlob(pBuf, &pPoslist[iPos], n); iPos += n; if( pBuf->n>=pgsz ){ fts5WriteFlushLeaf(p, &writer); pBuf = &writer.aWriter[0].buf; } if( iPos>=nCopy ) break; } } iOff += nCopy; } } pBuf->p[pBuf->n++] = '\0'; assert( pBuf->n<=pBuf->nSpace ); zPrev = (const u8*)zTerm; sqlite3Fts5HashScanNext(pHash); } sqlite3Fts5HashClear(pHash); fts5WriteFinish(p, &writer, &nHeight, &pgnoLast); /* Update the Fts5Structure. It is written back to the database by the ** fts5StructureRelease() call below. */ if( pStruct->nLevel==0 ){ fts5StructureAddLevel(&p->rc, &pStruct); } fts5StructureExtendLevel(&p->rc, pStruct, 0, 1, 0); if( p->rc==SQLITE_OK ){ pSeg = &pStruct->aLevel[0].aSeg[ pStruct->aLevel[0].nSeg++ ]; pSeg->iSegid = iSegid; pSeg->nHeight = nHeight; pSeg->pgnoFirst = 1; pSeg->pgnoLast = pgnoLast; pStruct->nSegment++; } fts5StructurePromote(p, 0, pStruct); } fts5IndexAutomerge(p, &pStruct, pgnoLast); fts5IndexCrisismerge(p, &pStruct); fts5StructureWrite(p, pStruct); fts5StructureRelease(pStruct); } /* ** Flush any data stored in the in-memory hash tables to the database. */ static void fts5IndexFlush(Fts5Index *p){ /* Unless it is empty, flush the hash table to disk */ if( p->nPendingData ){ assert( p->pHash ); p->nPendingData = 0; fts5FlushOneHash(p); } } int sqlite3Fts5IndexOptimize(Fts5Index *p){ Fts5Structure *pStruct; Fts5Structure *pNew = 0; int nSeg = 0; assert( p->rc==SQLITE_OK ); fts5IndexFlush(p); pStruct = fts5StructureRead(p); if( pStruct ){ assert( pStruct->nSegment==fts5StructureCountSegments(pStruct) ); nSeg = pStruct->nSegment; if( nSeg>1 ){ int nByte = sizeof(Fts5Structure); nByte += (pStruct->nLevel+1) * sizeof(Fts5StructureLevel); pNew = (Fts5Structure*)sqlite3Fts5MallocZero(&p->rc, nByte); } } if( pNew ){ Fts5StructureLevel *pLvl; int nByte = nSeg * sizeof(Fts5StructureSegment); pNew->nLevel = pStruct->nLevel+1; pNew->nWriteCounter = pStruct->nWriteCounter; pLvl = &pNew->aLevel[pStruct->nLevel]; pLvl->aSeg = (Fts5StructureSegment*)sqlite3Fts5MallocZero(&p->rc, nByte); if( pLvl->aSeg ){ int iLvl, iSeg; int iSegOut = 0; for(iLvl=0; iLvlnLevel; iLvl++){ for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ pLvl->aSeg[iSegOut] = pStruct->aLevel[iLvl].aSeg[iSeg]; iSegOut++; } } pNew->nSegment = pLvl->nSeg = nSeg; }else{ sqlite3_free(pNew); pNew = 0; } } if( pNew ){ int iLvl = pNew->nLevel-1; while( p->rc==SQLITE_OK && pNew->aLevel[iLvl].nSeg>0 ){ int nRem = FTS5_OPT_WORK_UNIT; fts5IndexMergeLevel(p, &pNew, iLvl, &nRem); } fts5StructureWrite(p, pNew); fts5StructureRelease(pNew); } fts5StructureRelease(pStruct); return fts5IndexReturn(p); } int sqlite3Fts5IndexMerge(Fts5Index *p, int nMerge){ Fts5Structure *pStruct; pStruct = fts5StructureRead(p); if( pStruct && pStruct->nLevel ){ fts5IndexMerge(p, &pStruct, nMerge); fts5StructureWrite(p, pStruct); } fts5StructureRelease(pStruct); return fts5IndexReturn(p); } static void fts5PoslistCallback( Fts5Index *p, void *pCtx, const u8 *pChunk, int nChunk ){ fts5BufferAppendBlob(&p->rc, (Fts5Buffer*)pCtx, nChunk, pChunk); } /* ** Iterator pIter currently points to a valid entry (not EOF). This ** function appends the position list data for the current entry to ** buffer pBuf. It does not make a copy of the position-list size ** field. */ static void fts5SegiterPoslist( Fts5Index *p, Fts5SegIter *pSeg, Fts5Buffer *pBuf ){ fts5ChunkIterate(p, pSeg, (void*)pBuf, fts5PoslistCallback); } /* ** Iterator pMulti currently points to a valid entry (not EOF). This ** function appends a copy of the position-list of the entry pMulti ** currently points to to buffer pBuf. ** ** If an error occurs, an error code is left in p->rc. It is assumed ** no error has already occurred when this function is called. */ static void fts5MultiIterPoslist( Fts5Index *p, Fts5MultiSegIter *pMulti, int bSz, /* Append a size field before the data */ Fts5Buffer *pBuf ){ if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( fts5MultiIterEof(p, pMulti)==0 ); if( bSz ){ /* WRITEPOSLISTSIZE */ fts5BufferAppendVarint(&p->rc, pBuf, pSeg->nPos*2); } fts5SegiterPoslist(p, pSeg, pBuf); } } static void fts5DoclistIterNext(Fts5DoclistIter *pIter){ if( pIter->in ){ int bDummy; if( pIter->i ){ i64 iDelta; pIter->i += fts5GetVarint(&pIter->a[pIter->i], (u64*)&iDelta); pIter->iRowid += iDelta; }else{ pIter->i += fts5GetVarint(&pIter->a[pIter->i], (u64*)&pIter->iRowid); } pIter->i += fts5GetPoslistSize( &pIter->a[pIter->i], &pIter->nPoslist, &bDummy ); pIter->aPoslist = &pIter->a[pIter->i]; pIter->i += pIter->nPoslist; }else{ pIter->aPoslist = 0; } } static void fts5DoclistIterInit( Fts5Buffer *pBuf, Fts5DoclistIter *pIter ){ memset(pIter, 0, sizeof(*pIter)); pIter->a = pBuf->p; pIter->n = pBuf->n; fts5DoclistIterNext(pIter); } /* ** Append a doclist to buffer pBuf. */ static void fts5MergeAppendDocid( int *pRc, /* IN/OUT: Error code */ Fts5Buffer *pBuf, /* Buffer to write to */ i64 *piLastRowid, /* IN/OUT: Previous rowid written (if any) */ i64 iRowid /* Rowid to append */ ){ if( pBuf->n==0 ){ fts5BufferAppendVarint(pRc, pBuf, iRowid); }else{ fts5BufferAppendVarint(pRc, pBuf, iRowid - *piLastRowid); } *piLastRowid = iRowid; } /* ** Buffers p1 and p2 contain doclists. This function merges the content ** of the two doclists together and sets buffer p1 to the result before ** returning. ** ** If an error occurs, an error code is left in p->rc. If an error has ** already occurred, this function is a no-op. */ static void fts5MergePrefixLists( Fts5Index *p, /* FTS5 backend object */ Fts5Buffer *p1, /* First list to merge */ Fts5Buffer *p2 /* Second list to merge */ ){ if( p2->n ){ i64 iLastRowid = 0; Fts5DoclistIter i1; Fts5DoclistIter i2; Fts5Buffer out; Fts5Buffer tmp; memset(&out, 0, sizeof(out)); memset(&tmp, 0, sizeof(tmp)); fts5DoclistIterInit(p1, &i1); fts5DoclistIterInit(p2, &i2); while( p->rc==SQLITE_OK && (i1.aPoslist!=0 || i2.aPoslist!=0) ){ if( i2.aPoslist==0 || (i1.aPoslist && i1.iRowidrc, &out, &iLastRowid, i1.iRowid); /* WRITEPOSLISTSIZE */ fts5BufferAppendVarint(&p->rc, &out, i1.nPoslist * 2); fts5BufferAppendBlob(&p->rc, &out, i1.nPoslist, i1.aPoslist); fts5DoclistIterNext(&i1); } else if( i1.aPoslist==0 || i2.iRowid!=i1.iRowid ){ /* Copy entry from i2 */ fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid); /* WRITEPOSLISTSIZE */ fts5BufferAppendVarint(&p->rc, &out, i2.nPoslist * 2); fts5BufferAppendBlob(&p->rc, &out, i2.nPoslist, i2.aPoslist); fts5DoclistIterNext(&i2); } else{ Fts5PoslistReader r1; Fts5PoslistReader r2; Fts5PoslistWriter writer; memset(&writer, 0, sizeof(writer)); /* Merge the two position lists. */ fts5MergeAppendDocid(&p->rc, &out, &iLastRowid, i2.iRowid); fts5BufferZero(&tmp); sqlite3Fts5PoslistReaderInit(-1, i1.aPoslist, i1.nPoslist, &r1); sqlite3Fts5PoslistReaderInit(-1, i2.aPoslist, i2.nPoslist, &r2); while( p->rc==SQLITE_OK && (r1.bEof==0 || r2.bEof==0) ){ i64 iNew; if( r2.bEof || (r1.bEof==0 && r1.iPosrc = sqlite3Fts5PoslistWriterAppend(&tmp, &writer, iNew); } /* WRITEPOSLISTSIZE */ fts5BufferAppendVarint(&p->rc, &out, tmp.n * 2); fts5BufferAppendBlob(&p->rc, &out, tmp.n, tmp.p); fts5DoclistIterNext(&i1); fts5DoclistIterNext(&i2); } } fts5BufferSet(&p->rc, p1, out.n, out.p); fts5BufferFree(&tmp); fts5BufferFree(&out); } } static void fts5BufferSwap(Fts5Buffer *p1, Fts5Buffer *p2){ Fts5Buffer tmp = *p1; *p1 = *p2; *p2 = tmp; } static void fts5SetupPrefixIter( Fts5Index *p, /* Index to read from */ int bDesc, /* True for "ORDER BY rowid DESC" */ const u8 *pToken, /* Buffer containing prefix to match */ int nToken, /* Size of buffer pToken in bytes */ Fts5IndexIter *pIter /* Populate this object */ ){ Fts5Structure *pStruct; Fts5Buffer *aBuf; const int nBuf = 32; aBuf = (Fts5Buffer*)fts5IdxMalloc(p, sizeof(Fts5Buffer)*nBuf); pStruct = fts5StructureRead(p); if( aBuf && pStruct ){ const int flags = FTS5INDEX_QUERY_SCAN; int i; i64 iLastRowid = 0; Fts5MultiSegIter *p1 = 0; /* Iterator used to gather data from index */ Fts5Data *pData; Fts5Buffer doclist; memset(&doclist, 0, sizeof(doclist)); for(fts5MultiIterNew(p, pStruct, 1, flags, pToken, nToken, -1, 0, &p1); fts5MultiIterEof(p, p1)==0; fts5MultiIterNext(p, p1, 0, 0) ){ i64 iRowid = fts5MultiIterRowid(p1); int nTerm; const u8 *pTerm = fts5MultiIterTerm(p1, &nTerm); assert( memcmp(pToken, pTerm, MIN(nToken, nTerm))<=0 ); if( nTerm0 && iRowid<=iLastRowid ){ for(i=0; p->rc==SQLITE_OK && doclist.n; i++){ assert( irc, &doclist, &iLastRowid, iRowid); fts5MultiIterPoslist(p, p1, 1, &doclist); } for(i=0; ip = (u8*)&pData[1]; pData->n = doclist.n; memcpy(pData->p, doclist.p, doclist.n); fts5MultiIterNew2(p, pData, bDesc, &pIter->pMulti); } fts5BufferFree(&doclist); } fts5StructureRelease(pStruct); sqlite3_free(aBuf); } /* ** Indicate that all subsequent calls to sqlite3Fts5IndexWrite() pertain ** to the document with rowid iRowid. */ int sqlite3Fts5IndexBeginWrite(Fts5Index *p, i64 iRowid){ assert( p->rc==SQLITE_OK ); /* Allocate the hash table if it has not already been allocated */ if( p->pHash==0 ){ p->rc = sqlite3Fts5HashNew(&p->pHash, &p->nPendingData); } /* Flush the hash table to disk if required */ if( iRowid<=p->iWriteRowid || (p->nPendingData > p->nMaxPendingData) ){ fts5IndexFlush(p); } p->iWriteRowid = iRowid; return fts5IndexReturn(p); } /* ** Commit data to disk. */ int sqlite3Fts5IndexSync(Fts5Index *p, int bCommit){ assert( p->rc==SQLITE_OK ); fts5IndexFlush(p); if( bCommit ) fts5CloseReader(p); return fts5IndexReturn(p); } /* ** Discard any data stored in the in-memory hash tables. Do not write it ** to the database. Additionally, assume that the contents of the %_data ** table may have changed on disk. So any in-memory caches of %_data ** records must be invalidated. */ int sqlite3Fts5IndexRollback(Fts5Index *p){ fts5CloseReader(p); fts5IndexDiscardData(p); assert( p->rc==SQLITE_OK ); return SQLITE_OK; } /* ** The %_data table is completely empty when this function is called. This ** function populates it with the initial structure objects for each index, ** and the initial version of the "averages" record (a zero-byte blob). */ int sqlite3Fts5IndexReinit(Fts5Index *p){ Fts5Structure s; assert( p->rc==SQLITE_OK ); p->rc = sqlite3Fts5IndexSetAverages(p, (const u8*)"", 0); memset(&s, 0, sizeof(Fts5Structure)); fts5StructureWrite(p, &s); return fts5IndexReturn(p); } /* ** Open a new Fts5Index handle. If the bCreate argument is true, create ** and initialize the underlying %_data table. ** ** If successful, set *pp to point to the new object and return SQLITE_OK. ** Otherwise, set *pp to NULL and return an SQLite error code. */ int sqlite3Fts5IndexOpen( Fts5Config *pConfig, int bCreate, Fts5Index **pp, char **pzErr ){ int rc = SQLITE_OK; Fts5Index *p; /* New object */ *pp = p = (Fts5Index*)sqlite3Fts5MallocZero(&rc, sizeof(Fts5Index)); if( rc==SQLITE_OK ){ p->pConfig = pConfig; p->nWorkUnit = FTS5_WORK_UNIT; p->nMaxPendingData = 1024*1024; p->zDataTbl = sqlite3Fts5Mprintf(&rc, "%s_data", pConfig->zName); if( p->zDataTbl && bCreate ){ rc = sqlite3Fts5CreateTable( pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr ); if( rc==SQLITE_OK ){ rc = sqlite3Fts5IndexReinit(p); } } } assert( rc!=SQLITE_OK || p->rc==SQLITE_OK ); if( rc ){ sqlite3Fts5IndexClose(p); *pp = 0; } return rc; } /* ** Close a handle opened by an earlier call to sqlite3Fts5IndexOpen(). */ int sqlite3Fts5IndexClose(Fts5Index *p){ int rc = SQLITE_OK; if( p ){ assert( p->pReader==0 ); sqlite3_finalize(p->pWriter); sqlite3_finalize(p->pDeleter); sqlite3Fts5HashFree(p->pHash); sqlite3Fts5BufferFree(&p->scratch); sqlite3_free(p->zDataTbl); sqlite3_free(p); } return rc; } /* ** Argument p points to a buffer containing utf-8 text that is n bytes in ** size. Return the number of bytes in the nChar character prefix of the ** buffer, or 0 if there are less than nChar characters in total. */ static int fts5IndexCharlenToBytelen(const char *p, int nByte, int nChar){ int n = 0; int i; for(i=0; i=nByte ) return 0; /* Input contains fewer than nChar chars */ if( (unsigned char)p[n++]>=0xc0 ){ while( (p[n] & 0xc0)==0x80 ) n++; } } return n; } /* ** pIn is a UTF-8 encoded string, nIn bytes in size. Return the number of ** unicode characters in the string. */ static int fts5IndexCharlen(const char *pIn, int nIn){ int nChar = 0; int i = 0; while( i=0xc0 ){ while( i delete) */ int iPos, /* Position of token within column */ const char *pToken, int nToken /* Token to add or remove to or from index */ ){ int i; /* Used to iterate through indexes */ int rc = SQLITE_OK; /* Return code */ Fts5Config *pConfig = p->pConfig; assert( p->rc==SQLITE_OK ); /* Add the entry to the main terms index. */ rc = sqlite3Fts5HashWrite( p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX, pToken, nToken ); for(i=0; inPrefix && rc==SQLITE_OK; i++){ int nByte = fts5IndexCharlenToBytelen(pToken, nToken, pConfig->aPrefix[i]); if( nByte ){ rc = sqlite3Fts5HashWrite(p->pHash, p->iWriteRowid, iCol, iPos, FTS5_MAIN_PREFIX+i+1, pToken, nByte ); } } return rc; } /* ** Open a new iterator to iterate though all docids that match the ** specified token or token prefix. */ int sqlite3Fts5IndexQuery( Fts5Index *p, /* FTS index to query */ const char *pToken, int nToken, /* Token (or prefix) to query for */ int flags, /* Mask of FTS5INDEX_QUERY_X flags */ Fts5IndexIter **ppIter /* OUT: New iterator object */ ){ Fts5Config *pConfig = p->pConfig; Fts5IndexIter *pRet; int iIdx = 0; Fts5Buffer buf = {0, 0, 0}; /* If the QUERY_SCAN flag is set, all other flags must be clear. */ assert( (flags & FTS5INDEX_QUERY_SCAN)==0 || (flags & FTS5INDEX_QUERY_SCAN)==FTS5INDEX_QUERY_SCAN ); if( sqlite3Fts5BufferGrow(&p->rc, &buf, nToken+1)==0 ){ memcpy(&buf.p[1], pToken, nToken); } #ifdef SQLITE_DEBUG if( flags & FTS5INDEX_QUERY_TEST_NOIDX ){ assert( flags & FTS5INDEX_QUERY_PREFIX ); iIdx = 1+pConfig->nPrefix; }else #endif if( flags & FTS5INDEX_QUERY_PREFIX ){ int nChar = fts5IndexCharlen(pToken, nToken); for(iIdx=1; iIdx<=pConfig->nPrefix; iIdx++){ if( pConfig->aPrefix[iIdx-1]==nChar ) break; } } pRet = (Fts5IndexIter*)sqlite3Fts5MallocZero(&p->rc, sizeof(Fts5IndexIter)); if( pRet ){ pRet->pIndex = p; if( iIdx<=pConfig->nPrefix ){ buf.p[0] = FTS5_MAIN_PREFIX + iIdx; pRet->pStruct = fts5StructureRead(p); if( pRet->pStruct ){ fts5MultiIterNew( p, pRet->pStruct, 1, flags, buf.p, nToken+1, -1, 0, &pRet->pMulti ); } }else{ int bDesc = (flags & FTS5INDEX_QUERY_DESC)!=0; buf.p[0] = FTS5_MAIN_PREFIX; fts5SetupPrefixIter(p, bDesc, buf.p, nToken+1, pRet); } } if( p->rc ){ sqlite3Fts5IterClose(pRet); pRet = 0; } *ppIter = pRet; sqlite3Fts5BufferFree(&buf); return fts5IndexReturn(p); } /* ** Return true if the iterator passed as the only argument is at EOF. */ int sqlite3Fts5IterEof(Fts5IndexIter *pIter){ assert( pIter->pIndex->rc==SQLITE_OK ); return fts5MultiIterEof(pIter->pIndex, pIter->pMulti); } /* ** Move to the next matching rowid. */ int sqlite3Fts5IterNext(Fts5IndexIter *pIter){ assert( pIter->pIndex->rc==SQLITE_OK ); fts5MultiIterNext(pIter->pIndex, pIter->pMulti, 0, 0); return fts5IndexReturn(pIter->pIndex); } /* ** Move to the next matching term/rowid. Used by the fts5vocab module. */ int sqlite3Fts5IterNextScan(Fts5IndexIter *pIter){ Fts5Index *p = pIter->pIndex; Fts5MultiSegIter *pMulti = pIter->pMulti; assert( pIter->pIndex->rc==SQLITE_OK ); assert( pMulti ); fts5MultiIterNext(p, pMulti, 0, 0); if( p->rc==SQLITE_OK ){ Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; if( pSeg->pLeaf && pSeg->term.p[0]!=FTS5_MAIN_PREFIX ){ fts5DataRelease(pSeg->pLeaf); pSeg->pLeaf = 0; } } return fts5IndexReturn(pIter->pIndex); } /* ** Move to the next matching rowid that occurs at or after iMatch. The ** definition of "at or after" depends on whether this iterator iterates ** in ascending or descending rowid order. */ int sqlite3Fts5IterNextFrom(Fts5IndexIter *pIter, i64 iMatch){ fts5MultiIterNextFrom(pIter->pIndex, pIter->pMulti, iMatch); return fts5IndexReturn(pIter->pIndex); } /* ** Return the current rowid. */ i64 sqlite3Fts5IterRowid(Fts5IndexIter *pIter){ return fts5MultiIterRowid(pIter->pMulti); } /* ** Return the current term. */ const char *sqlite3Fts5IterTerm(Fts5IndexIter *pIter, int *pn){ int n; const char *z = (const char*)fts5MultiIterTerm(pIter->pMulti, &n); *pn = n-1; return &z[1]; } /* ** Return a pointer to a buffer containing a copy of the position list for ** the current entry. Output variable *pn is set to the size of the buffer ** in bytes before returning. ** ** The returned position list does not include the "number of bytes" varint ** field that starts the position list on disk. */ int sqlite3Fts5IterPoslist( Fts5IndexIter *pIter, const u8 **pp, /* OUT: Pointer to position-list data */ int *pn, /* OUT: Size of position-list in bytes */ i64 *piRowid /* OUT: Current rowid */ ){ Fts5MultiSegIter *pMulti = pIter->pMulti; Fts5SegIter *pSeg = &pMulti->aSeg[ pMulti->aFirst[1].iFirst ]; assert( pIter->pIndex->rc==SQLITE_OK ); *piRowid = pSeg->iRowid; *pn = pSeg->nPos; if( pSeg->iLeafOffset+pSeg->nPos <= pSeg->pLeaf->n ){ *pp = &pSeg->pLeaf->p[pSeg->iLeafOffset]; }else{ fts5BufferZero(&pIter->poslist); fts5SegiterPoslist(pIter->pIndex, pSeg, &pIter->poslist); *pp = pIter->poslist.p; } return fts5IndexReturn(pIter->pIndex); } /* ** This function is similar to sqlite3Fts5IterPoslist(), except that it ** copies the position list into the buffer supplied as the second ** argument. */ int sqlite3Fts5IterPoslistBuffer(Fts5IndexIter *pIter, Fts5Buffer *pBuf){ Fts5Index *p = pIter->pIndex; Fts5MultiSegIter *pMulti = pIter->pMulti; assert( p->rc==SQLITE_OK ); fts5BufferZero(pBuf); fts5MultiIterPoslist(p, pMulti, 0, pBuf); return fts5IndexReturn(p); } /* ** Close an iterator opened by an earlier call to sqlite3Fts5IndexQuery(). */ void sqlite3Fts5IterClose(Fts5IndexIter *pIter){ if( pIter ){ fts5MultiIterFree(pIter->pIndex, pIter->pMulti); fts5StructureRelease(pIter->pStruct); fts5BufferFree(&pIter->poslist); fts5CloseReader(pIter->pIndex); sqlite3_free(pIter); } } /* ** Read the "averages" record into the buffer supplied as the second ** argument. Return SQLITE_OK if successful, or an SQLite error code ** if an error occurs. */ int sqlite3Fts5IndexGetAverages(Fts5Index *p, Fts5Buffer *pBuf){ assert( p->rc==SQLITE_OK ); fts5DataReadOrBuffer(p, pBuf, FTS5_AVERAGES_ROWID); return fts5IndexReturn(p); } /* ** Replace the current "averages" record with the contents of the buffer ** supplied as the second argument. */ int sqlite3Fts5IndexSetAverages(Fts5Index *p, const u8 *pData, int nData){ assert( p->rc==SQLITE_OK ); fts5DataWrite(p, FTS5_AVERAGES_ROWID, pData, nData); return fts5IndexReturn(p); } /* ** Return the total number of blocks this module has read from the %_data ** table since it was created. */ int sqlite3Fts5IndexReads(Fts5Index *p){ return p->nRead; } /* ** Set the 32-bit cookie value stored at the start of all structure ** records to the value passed as the second argument. ** ** Return SQLITE_OK if successful, or an SQLite error code if an error ** occurs. */ int sqlite3Fts5IndexSetCookie(Fts5Index *p, int iNew){ int rc; /* Return code */ Fts5Config *pConfig = p->pConfig; /* Configuration object */ u8 aCookie[4]; /* Binary representation of iNew */ sqlite3_blob *pBlob = 0; assert( p->rc==SQLITE_OK ); sqlite3Fts5Put32(aCookie, iNew); rc = sqlite3_blob_open(pConfig->db, pConfig->zDb, p->zDataTbl, "block", FTS5_STRUCTURE_ROWID, 1, &pBlob ); if( rc==SQLITE_OK ){ sqlite3_blob_write(pBlob, aCookie, 4, 0); rc = sqlite3_blob_close(pBlob); } return rc; } int sqlite3Fts5IndexLoadConfig(Fts5Index *p){ Fts5Structure *pStruct; pStruct = fts5StructureRead(p); fts5StructureRelease(pStruct); return fts5IndexReturn(p); } /************************************************************************* ************************************************************************** ** Below this point is the implementation of the integrity-check ** functionality. */ /* ** Return a simple checksum value based on the arguments. */ static u64 fts5IndexEntryCksum( i64 iRowid, int iCol, int iPos, int iIdx, const char *pTerm, int nTerm ){ int i; u64 ret = iRowid; ret += (ret<<3) + iCol; ret += (ret<<3) + iPos; if( iIdx>=0 ) ret += (ret<<3) + (FTS5_MAIN_PREFIX + iIdx); for(i=0; iaLvl[0]) * (pSeg->nHeight-1); memset(pIter, 0, sizeof(*pIter)); if( nByte ){ pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte); } if( p->rc==SQLITE_OK ){ pIter->nLvl = pSeg->nHeight-1; pIter->p = p; pIter->pSeg = pSeg; } for(i=0; p->rc==SQLITE_OK && inLvl; i++){ i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, i+1, 1); Fts5Data *pData; pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid); if( pData ){ fts5NodeIterInit(pData->p, pData->n, &pIter->aLvl[i].s); } } if( pIter->nLvl==0 || p->rc ){ pIter->bEof = 1; pIter->iLeaf = pSeg->pgnoLast; }else{ pIter->nEmpty = pIter->aLvl[0].s.nEmpty; pIter->iLeaf = pIter->aLvl[0].s.iChild; pIter->bDlidx = pIter->aLvl[0].s.bDlidx; } } static void fts5BtreeIterNext(Fts5BtreeIter *pIter){ Fts5Index *p = pIter->p; int i; assert( pIter->bEof==0 && pIter->aLvl[0].s.aData ); for(i=0; inLvl && p->rc==SQLITE_OK; i++){ Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; fts5NodeIterNext(&p->rc, &pLvl->s); if( pLvl->s.aData ){ fts5BufferSet(&p->rc, &pIter->term, pLvl->s.term.n, pLvl->s.term.p); break; }else{ fts5NodeIterFree(&pLvl->s); fts5DataRelease(pLvl->pData); pLvl->pData = 0; } } if( i==pIter->nLvl || p->rc ){ pIter->bEof = 1; }else{ int iSegid = pIter->pSeg->iSegid; for(i--; i>=0; i--){ Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; i64 iRowid = FTS5_SEGMENT_ROWID(iSegid, i+1, pLvl[1].s.iChild); pLvl->pData = fts5DataRead(p, iRowid); if( pLvl->pData ){ fts5NodeIterInit(pLvl->pData->p, pLvl->pData->n, &pLvl->s); } } } pIter->nEmpty = pIter->aLvl[0].s.nEmpty; pIter->bDlidx = pIter->aLvl[0].s.bDlidx; pIter->iLeaf = pIter->aLvl[0].s.iChild; } static void fts5BtreeIterFree(Fts5BtreeIter *pIter){ int i; for(i=0; inLvl; i++){ Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i]; fts5NodeIterFree(&pLvl->s); if( pLvl->pData ){ fts5DataRelease(pLvl->pData); pLvl->pData = 0; } } sqlite3_free(pIter->aLvl); fts5BufferFree(&pIter->term); } #ifdef SQLITE_DEBUG /* ** This function is purely an internal test. It does not contribute to ** FTS functionality, or even the integrity-check, in any way. ** ** Instead, it tests that the same set of pgno/rowid combinations are ** visited regardless of whether the doclist-index identified by parameters ** iSegid/iLeaf is iterated in forwards or reverse order. */ static void fts5TestDlidxReverse( Fts5Index *p, int iSegid, /* Segment id to load from */ int iLeaf /* Load doclist-index for this leaf */ ){ Fts5DlidxIter *pDlidx = 0; u64 cksum1 = 13; u64 cksum2 = 13; for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(p, pDlidx) ){ i64 iRowid = fts5DlidxIterRowid(pDlidx); int pgno = fts5DlidxIterPgno(pDlidx); assert( pgno>iLeaf ); cksum1 += iRowid + ((i64)pgno<<32); } fts5DlidxIterFree(pDlidx); pDlidx = 0; for(pDlidx=fts5DlidxIterInit(p, 1, iSegid, iLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterPrev(p, pDlidx) ){ i64 iRowid = fts5DlidxIterRowid(pDlidx); int pgno = fts5DlidxIterPgno(pDlidx); assert( fts5DlidxIterPgno(pDlidx)>iLeaf ); cksum2 += iRowid + ((i64)pgno<<32); } fts5DlidxIterFree(pDlidx); pDlidx = 0; if( p->rc==SQLITE_OK && cksum1!=cksum2 ) p->rc = FTS5_CORRUPT; } static int fts5QueryCksum( Fts5Index *p, /* Fts5 index object */ int iIdx, const char *z, /* Index key to query for */ int n, /* Size of index key in bytes */ int flags, /* Flags for Fts5IndexQuery */ u64 *pCksum /* IN/OUT: Checksum value */ ){ u64 cksum = *pCksum; Fts5IndexIter *pIdxIter = 0; int rc = sqlite3Fts5IndexQuery(p, z, n, flags, &pIdxIter); while( rc==SQLITE_OK && 0==sqlite3Fts5IterEof(pIdxIter) ){ i64 dummy; const u8 *pPos; int nPos; i64 rowid = sqlite3Fts5IterRowid(pIdxIter); rc = sqlite3Fts5IterPoslist(pIdxIter, &pPos, &nPos, &dummy); if( rc==SQLITE_OK ){ Fts5PoslistReader sReader; for(sqlite3Fts5PoslistReaderInit(-1, pPos, nPos, &sReader); sReader.bEof==0; sqlite3Fts5PoslistReaderNext(&sReader) ){ int iCol = FTS5_POS2COLUMN(sReader.iPos); int iOff = FTS5_POS2OFFSET(sReader.iPos); cksum ^= fts5IndexEntryCksum(rowid, iCol, iOff, iIdx, z, n); } rc = sqlite3Fts5IterNext(pIdxIter); } } sqlite3Fts5IterClose(pIdxIter); *pCksum = cksum; return rc; } /* ** This function is also purely an internal test. It does not contribute to ** FTS functionality, or even the integrity-check, in any way. */ static void fts5TestTerm( Fts5Index *p, Fts5Buffer *pPrev, /* Previous term */ const char *z, int n, /* Possibly new term to test */ u64 expected, u64 *pCksum ){ int rc = p->rc; if( pPrev->n==0 ){ fts5BufferSet(&rc, pPrev, n, (const u8*)z); }else if( rc==SQLITE_OK && (pPrev->n!=n || memcmp(pPrev->p, z, n)) ){ u64 cksum3 = *pCksum; const char *zTerm = (const char*)&pPrev->p[1]; /* term sans prefix-byte */ int nTerm = pPrev->n-1; /* Size of zTerm in bytes */ int iIdx = (pPrev->p[0] - FTS5_MAIN_PREFIX); int flags = (iIdx==0 ? 0 : FTS5INDEX_QUERY_PREFIX); int rc; u64 ck1 = 0; u64 ck2 = 0; /* Check that the results returned for ASC and DESC queries are ** the same. If not, call this corruption. */ rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, flags, &ck1); if( rc==SQLITE_OK ){ int f = flags|FTS5INDEX_QUERY_DESC; rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); } if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; /* If this is a prefix query, check that the results returned if the ** the index is disabled are the same. In both ASC and DESC order. */ if( iIdx>0 && rc==SQLITE_OK ){ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX; ck2 = 0; rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; } if( iIdx>0 && rc==SQLITE_OK ){ int f = flags|FTS5INDEX_QUERY_TEST_NOIDX|FTS5INDEX_QUERY_DESC; ck2 = 0; rc = fts5QueryCksum(p, iIdx, zTerm, nTerm, f, &ck2); if( rc==SQLITE_OK && ck1!=ck2 ) rc = FTS5_CORRUPT; } cksum3 ^= ck1; fts5BufferSet(&rc, pPrev, n, (const u8*)z); if( rc==SQLITE_OK && cksum3!=expected ){ rc = FTS5_CORRUPT; } *pCksum = cksum3; } p->rc = rc; } #else # define fts5TestDlidxReverse(x,y,z) # define fts5TestTerm(u,v,w,x,y,z) #endif static void fts5IndexIntegrityCheckSegment( Fts5Index *p, /* FTS5 backend object */ Fts5StructureSegment *pSeg /* Segment to check internal consistency */ ){ Fts5BtreeIter iter; /* Used to iterate through b-tree hierarchy */ if( pSeg->pgnoFirst==0 ) return; /* Iterate through the b-tree hierarchy. */ for(fts5BtreeIterInit(p, pSeg, &iter); p->rc==SQLITE_OK && iter.bEof==0; fts5BtreeIterNext(&iter) ){ i64 iRow; /* Rowid for this leaf */ Fts5Data *pLeaf; /* Data for this leaf */ int iOff; /* Offset of first term on leaf */ int i; /* Used to iterate through empty leaves */ /* If the leaf in question has already been trimmed from the segment, ** ignore this b-tree entry. Otherwise, load it into memory. */ if( iter.iLeafpgnoFirst ) continue; iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iter.iLeaf); pLeaf = fts5DataRead(p, iRow); if( pLeaf==0 ) break; /* Check that the leaf contains at least one term, and that it is equal ** to or larger than the split-key in iter.term. Also check that if there ** is also a rowid pointer within the leaf page header, it points to a ** location before the term. */ iOff = fts5GetU16(&pLeaf->p[2]); if( iOff==0 ){ p->rc = FTS5_CORRUPT; }else{ int iRowidOff; int nTerm; /* Size of term on leaf in bytes */ int res; /* Comparison of term and split-key */ iRowidOff = fts5GetU16(&pLeaf->p[0]); if( iRowidOff>=iOff ){ p->rc = FTS5_CORRUPT; }else{ iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm); res = memcmp(&pLeaf->p[iOff], iter.term.p, MIN(nTerm, iter.term.n)); if( res==0 ) res = nTerm - iter.term.n; if( res<0 ) p->rc = FTS5_CORRUPT; } } fts5DataRelease(pLeaf); if( p->rc ) break; /* Now check that the iter.nEmpty leaves following the current leaf ** (a) exist and (b) contain no terms. */ for(i=1; p->rc==SQLITE_OK && i<=iter.nEmpty; i++){ pLeaf = fts5DataRead(p, iRow+i); if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){ p->rc = FTS5_CORRUPT; } fts5DataRelease(pLeaf); } /* If there is a doclist-index, check that it looks right. */ if( iter.bDlidx ){ Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */ int iPrevLeaf = iter.iLeaf; int iSegid = pSeg->iSegid; int iPg; i64 iKey; for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iter.iLeaf); fts5DlidxIterEof(p, pDlidx)==0; fts5DlidxIterNext(p, pDlidx) ){ /* Check any rowid-less pages that occur before the current leaf. */ for(iPg=iPrevLeaf+1; iPgp[0])!=0 ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } } iPrevLeaf = fts5DlidxIterPgno(pDlidx); /* Check that the leaf page indicated by the iterator really does ** contain the rowid suggested by the same. */ iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPrevLeaf); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ i64 iRowid; int iRowidOff = fts5GetU16(&pLeaf->p[0]); if( iRowidOff>=pLeaf->n ){ p->rc = FTS5_CORRUPT; }else{ fts5GetVarint(&pLeaf->p[iRowidOff], (u64*)&iRowid); if( iRowid!=fts5DlidxIterRowid(pDlidx) ) p->rc = FTS5_CORRUPT; } fts5DataRelease(pLeaf); } } for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){ iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg); pLeaf = fts5DataRead(p, iKey); if( pLeaf ){ if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT; fts5DataRelease(pLeaf); } } fts5DlidxIterFree(pDlidx); fts5TestDlidxReverse(p, iSegid, iter.iLeaf); } } /* Page iter.iLeaf must now be the rightmost leaf-page in the segment */ if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){ p->rc = FTS5_CORRUPT; } fts5BtreeIterFree(&iter); } /* ** Run internal checks to ensure that the FTS index (a) is internally ** consistent and (b) contains entries for which the XOR of the checksums ** as calculated by fts5IndexEntryCksum() is cksum. ** ** Return SQLITE_CORRUPT if any of the internal checks fail, or if the ** checksum does not match. Return SQLITE_OK if all checks pass without ** error, or some other SQLite error code if another error (e.g. OOM) ** occurs. */ int sqlite3Fts5IndexIntegrityCheck(Fts5Index *p, u64 cksum){ u64 cksum2 = 0; /* Checksum based on contents of indexes */ Fts5Buffer poslist = {0,0,0}; /* Buffer used to hold a poslist */ Fts5MultiSegIter *pIter; /* Used to iterate through entire index */ Fts5Structure *pStruct; /* Index structure */ /* Used by extra internal tests only run if NDEBUG is not defined */ u64 cksum3 = 0; /* Checksum based on contents of indexes */ Fts5Buffer term = {0,0,0}; /* Buffer used to hold most recent term */ /* Load the FTS index structure */ pStruct = fts5StructureRead(p); /* Check that the internal nodes of each segment match the leaves */ if( pStruct ){ int iLvl, iSeg; for(iLvl=0; iLvlnLevel; iLvl++){ for(iSeg=0; iSegaLevel[iLvl].nSeg; iSeg++){ Fts5StructureSegment *pSeg = &pStruct->aLevel[iLvl].aSeg[iSeg]; fts5IndexIntegrityCheckSegment(p, pSeg); } } } /* The cksum argument passed to this function is a checksum calculated ** based on all expected entries in the FTS index (including prefix index ** entries). This block checks that a checksum calculated based on the ** actual contents of FTS index is identical. ** ** Two versions of the same checksum are calculated. The first (stack ** variable cksum2) based on entries extracted from the full-text index ** while doing a linear scan of each individual index in turn. ** ** As each term visited by the linear scans, a separate query for the ** same term is performed. cksum3 is calculated based on the entries ** extracted by these queries. */ for(fts5MultiIterNew(p, pStruct, 0, 0, 0, 0, -1, 0, &pIter); fts5MultiIterEof(p, pIter)==0; fts5MultiIterNext(p, pIter, 0, 0) ){ int n; /* Size of term in bytes */ i64 iPos = 0; /* Position read from poslist */ int iOff = 0; /* Offset within poslist */ i64 iRowid = fts5MultiIterRowid(pIter); char *z = (char*)fts5MultiIterTerm(pIter, &n); poslist.n = 0; fts5MultiIterPoslist(p, pIter, 0, &poslist); while( 0==sqlite3Fts5PoslistNext64(poslist.p, poslist.n, &iOff, &iPos) ){ int iCol = FTS5_POS2COLUMN(iPos); int iTokOff = FTS5_POS2OFFSET(iPos); cksum2 ^= fts5IndexEntryCksum(iRowid, iCol, iTokOff, -1, z, n); } /* If this is a new term, query for it. Update cksum3 with the results. */ fts5TestTerm(p, &term, z, n, cksum2, &cksum3); } fts5TestTerm(p, &term, 0, 0, cksum2, &cksum3); fts5MultiIterFree(p, pIter); if( p->rc==SQLITE_OK && cksum!=cksum2 ) p->rc = FTS5_CORRUPT; fts5StructureRelease(pStruct); fts5BufferFree(&term); fts5BufferFree(&poslist); return fts5IndexReturn(p); } /* ** Calculate and return a checksum that is the XOR of the index entry ** checksum of all entries that would be generated by the token specified ** by the final 5 arguments. */ u64 sqlite3Fts5IndexCksum( Fts5Config *pConfig, /* Configuration object */ i64 iRowid, /* Document term appears in */ int iCol, /* Column term appears in */ int iPos, /* Position term appears in */ const char *pTerm, int nTerm /* Term at iPos */ ){ u64 ret = 0; /* Return value */ int iIdx; /* For iterating through indexes */ ret = fts5IndexEntryCksum(iRowid, iCol, iPos, 0, pTerm, nTerm); for(iIdx=0; iIdxnPrefix; iIdx++){ int nByte = fts5IndexCharlenToBytelen(pTerm, nTerm, pConfig->aPrefix[iIdx]); if( nByte ){ ret ^= fts5IndexEntryCksum(iRowid, iCol, iPos, iIdx+1, pTerm, nByte); } } return ret; } /************************************************************************* ************************************************************************** ** Below this point is the implementation of the fts5_decode() scalar ** function only. */ /* ** Decode a segment-data rowid from the %_data table. This function is ** the opposite of macro FTS5_SEGMENT_ROWID(). */ static void fts5DecodeRowid( i64 iRowid, /* Rowid from %_data table */ int *piSegid, /* OUT: Segment id */ int *pbDlidx, /* OUT: Dlidx flag */ int *piHeight, /* OUT: Height */ int *piPgno /* OUT: Page number */ ){ *piPgno = (int)(iRowid & (((i64)1 << FTS5_DATA_PAGE_B) - 1)); iRowid >>= FTS5_DATA_PAGE_B; *piHeight = (int)(iRowid & (((i64)1 << FTS5_DATA_HEIGHT_B) - 1)); iRowid >>= FTS5_DATA_HEIGHT_B; *pbDlidx = (int)(iRowid & 0x0001); iRowid >>= FTS5_DATA_DLI_B; *piSegid = (int)(iRowid & (((i64)1 << FTS5_DATA_ID_B) - 1)); } static void fts5DebugRowid(int *pRc, Fts5Buffer *pBuf, i64 iKey){ int iSegid, iHeight, iPgno, bDlidx; /* Rowid compenents */ fts5DecodeRowid(iKey, &iSegid, &bDlidx, &iHeight, &iPgno); if( iSegid==0 ){ if( iKey==FTS5_AVERAGES_ROWID ){ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(averages) "); }else{ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(structure)"); } } else{ sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "(%ssegid=%d h=%d pgno=%d)", bDlidx ? "dlidx " : "", iSegid, iHeight, iPgno ); } } static void fts5DebugStructure( int *pRc, /* IN/OUT: error code */ Fts5Buffer *pBuf, Fts5Structure *p ){ int iLvl, iSeg; /* Iterate through levels, segments */ for(iLvl=0; iLvlnLevel; iLvl++){ Fts5StructureLevel *pLvl = &p->aLevel[iLvl]; sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {lvl=%d nMerge=%d", iLvl, pLvl->nMerge ); for(iSeg=0; iSegnSeg; iSeg++){ Fts5StructureSegment *pSeg = &pLvl->aSeg[iSeg]; sqlite3Fts5BufferAppendPrintf(pRc, pBuf, " {id=%d h=%d leaves=%d..%d}", pSeg->iSegid, pSeg->nHeight, pSeg->pgnoFirst, pSeg->pgnoLast ); } sqlite3Fts5BufferAppendPrintf(pRc, pBuf, "}"); } } /* ** This is part of the fts5_decode() debugging aid. ** ** Arguments pBlob/nBlob contain a serialized Fts5Structure object. This ** function appends a human-readable representation of the same object ** to the buffer passed as the second argument. */ static void fts5DecodeStructure( int *pRc, /* IN/OUT: error code */ Fts5Buffer *pBuf, const u8 *pBlob, int nBlob ){ int rc; /* Return code */ Fts5Structure *p = 0; /* Decoded structure object */ rc = fts5StructureDecode(pBlob, nBlob, 0, &p); if( rc!=SQLITE_OK ){ *pRc = rc; return; } fts5DebugStructure(pRc, pBuf, p); fts5StructureRelease(p); } /* ** Buffer (a/n) is assumed to contain a list of serialized varints. Read ** each varint and append its string representation to buffer pBuf. Return ** after either the input buffer is exhausted or a 0 value is read. ** ** The return value is the number of bytes read from the input buffer. */ static int fts5DecodePoslist(int *pRc, Fts5Buffer *pBuf, const u8 *a, int n){ int iOff = 0; while( iOff=4 ){ iRowidOff = fts5GetU16(&a[0]); iTermOff = fts5GetU16(&a[2]); }else{ sqlite3Fts5BufferSet(&rc, &s, 8, (const u8*)"corrupt"); goto decode_out; } if( iRowidOff ){ iOff = iRowidOff; }else if( iTermOff ){ iOff = iTermOff; }else{ iOff = n; } fts5DecodePoslist(&rc, &s, &a[4], iOff-4); assert( iRowidOff==0 || iOff==iRowidOff ); if( iRowidOff ){ iOff += fts5DecodeDoclist(&rc, &s, &a[iOff], n-iOff); } assert( iTermOff==0 || iOff==iTermOff ); while( iOff