Use a WITHOUT ROWID table to index fts5 btree leaves. This is faster to query and only slightly larger than storing btree nodes within an intkey table.

FossilOrigin-Name: 862418e3506d4b7cca9c44d58c2eb9dc915d75c9
This commit is contained in:
dan 2015-07-15 19:46:02 +00:00
parent f4fa0b8073
commit e3229c19cb
7 changed files with 241 additions and 337 deletions

View File

@ -17,7 +17,7 @@
#include "fts5Int.h"
#define FTS5_DEFAULT_PAGE_SIZE 1000
#define FTS5_DEFAULT_PAGE_SIZE 4050
#define FTS5_DEFAULT_AUTOMERGE 4
#define FTS5_DEFAULT_CRISISMERGE 16

View File

@ -287,9 +287,8 @@ int sqlite3Fts5Corrupt() { return SQLITE_CORRUPT_VTAB; }
** without overreading if the records are corrupt.
*/
#define FTS5_DATA_ZERO_PADDING 8
#define FTS5_DATA_PADDING 20
typedef struct Fts5BtreeIter Fts5BtreeIter;
typedef struct Fts5BtreeIterLevel Fts5BtreeIterLevel;
typedef struct Fts5Data Fts5Data;
typedef struct Fts5DlidxIter Fts5DlidxIter;
typedef struct Fts5DlidxLvl Fts5DlidxLvl;
@ -333,6 +332,9 @@ struct Fts5Index {
sqlite3_blob *pReader; /* RO incr-blob open on %_data table */
sqlite3_stmt *pWriter; /* "INSERT ... %_data VALUES(?,?)" */
sqlite3_stmt *pDeleter; /* "DELETE FROM %_data ... id>=? AND id<=?" */
sqlite3_stmt *pIdxWriter; /* "INSERT ... %_idx VALUES(?,?,?,?)" */
sqlite3_stmt *pIdxDeleter; /* "DELETE FROM %_idx WHERE segid=? */
sqlite3_stmt *pIdxSelect;
int nRead; /* Total number of blocks read */
};
@ -387,8 +389,7 @@ struct Fts5DlidxWriter {
};
struct Fts5SegWriter {
int iSegid; /* Segid to write to */
int nWriter; /* Number of entries in aWriter */
Fts5PageWriter *aWriter; /* Array of PageWriter objects */
Fts5PageWriter writer; /* PageWriter object */
i64 iPrevRowid; /* Previous docid written to current leaf */
u8 bFirstRowidInDoclist; /* True if next rowid is first in doclist */
u8 bFirstRowidInPage; /* True if next rowid is first in page */
@ -398,6 +399,10 @@ struct Fts5SegWriter {
int nDlidx; /* Allocated size of aDlidx[] array */
Fts5DlidxWriter *aDlidx; /* Array of Fts5DlidxWriter objects */
/* Values to insert into the %_idx table */
Fts5Buffer btterm; /* TODO: Docs */
int iBtPage; /* TODO: This */
};
/*
@ -570,43 +575,6 @@ struct Fts5DlidxIter {
/*
** An Fts5BtreeIter object is used to iterate through all entries in the
** b-tree hierarchy belonging to a single fts5 segment. In this case the
** "b-tree hierarchy" is all b-tree nodes except leaves. Each entry in the
** b-tree hierarchy consists of the following:
**
** iLeaf: The page number of the leaf page the entry points to.
**
** term: A split-key that all terms on leaf page $iLeaf must be greater
** than or equal to. The "term" associated with the first b-tree
** hierarchy entry (the one that points to leaf page 1) is always
** an empty string.
**
** nEmpty: The number of empty (termless) leaf pages that immediately
** following iLeaf.
**
** The Fts5BtreeIter object is only used as part of the integrity-check code.
*/
struct Fts5BtreeIterLevel {
Fts5NodeIter s; /* Iterator for the current node */
Fts5Data *pData; /* Data for the current node */
};
struct Fts5BtreeIter {
Fts5Index *p; /* FTS5 backend object */
Fts5StructureSegment *pSeg; /* Iterate through this segment's b-tree */
int nLvl; /* Size of aLvl[] array */
Fts5BtreeIterLevel *aLvl; /* Level for each tier of b-tree */
/* Output variables */
Fts5Buffer term; /* Current term */
int iLeaf; /* Leaf containing terms >= current term */
int nEmpty; /* Number of "empty" leaves following iLeaf */
int bEof; /* Set to true at EOF */
int bDlidx; /* True if there exists a dlidx */
};
/*
** The first argument passed to this macro is a pointer to an Fts5Buffer
** object.
@ -748,7 +716,7 @@ static Fts5Data *fts5DataReadOrBuffer(
rc = SQLITE_NOMEM;
}
}else{
int nSpace = nByte + FTS5_DATA_ZERO_PADDING;
int nSpace = nByte + FTS5_DATA_PADDING;
pRet = (Fts5Data*)sqlite3_malloc(nSpace+sizeof(Fts5Data));
if( pRet ){
pRet->n = nByte;
@ -805,6 +773,23 @@ static void fts5DataRelease(Fts5Data *pData){
sqlite3_free(pData);
}
static int fts5IndexPrepareStmt(
Fts5Index *p,
sqlite3_stmt **ppStmt,
char *zSql
){
if( p->rc==SQLITE_OK ){
if( zSql ){
p->rc = sqlite3_prepare_v2(p->pConfig->db, zSql, -1, ppStmt, 0);
}else{
p->rc = SQLITE_NOMEM;
}
}
sqlite3_free(zSql);
return p->rc;
}
/*
** INSERT OR REPLACE a record into the %_data table.
*/
@ -814,17 +799,11 @@ static void fts5DataWrite(Fts5Index *p, i64 iRowid, const u8 *pData, int nData){
if( p->pWriter==0 ){
int rc = SQLITE_OK;
Fts5Config *pConfig = p->pConfig;
char *zSql = sqlite3Fts5Mprintf(&rc,
"REPLACE INTO '%q'.%Q(id, block) VALUES(?,?)", pConfig->zDb, p->zDataTbl
);
if( zSql ){
rc = sqlite3_prepare_v2(pConfig->db, zSql, -1, &p->pWriter, 0);
sqlite3_free(zSql);
}
if( rc!=SQLITE_OK ){
p->rc = rc;
return;
}
fts5IndexPrepareStmt(p, &p->pWriter, sqlite3_mprintf(
"REPLACE INTO '%q'.'%q_data'(id, block) VALUES(?,?)",
pConfig->zDb, pConfig->zName
));
if( p->rc ) return;
}
sqlite3_bind_int64(p->pWriter, 1, iRowid);
@ -845,7 +824,8 @@ static void fts5DataDelete(Fts5Index *p, i64 iFirst, i64 iLast){
int rc;
Fts5Config *pConfig = p->pConfig;
char *zSql = sqlite3_mprintf(
"DELETE FROM '%q'.%Q WHERE id>=? AND id<=?", pConfig->zDb, p->zDataTbl
"DELETE FROM '%q'.'%q_data' WHERE id>=? AND id<=?",
pConfig->zDb, pConfig->zName
);
if( zSql==0 ){
rc = SQLITE_NOMEM;
@ -872,6 +852,18 @@ static void fts5DataRemoveSegment(Fts5Index *p, int iSegid){
i64 iFirst = FTS5_SEGMENT_ROWID(iSegid, 0, 0);
i64 iLast = FTS5_SEGMENT_ROWID(iSegid+1, 0, 0)-1;
fts5DataDelete(p, iFirst, iLast);
if( p->pIdxDeleter==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxDeleter, sqlite3_mprintf(
"DELETE FROM '%q'.'%q_idx' WHERE segid=?",
pConfig->zDb, pConfig->zName
));
}
if( p->rc==SQLITE_OK ){
sqlite3_bind_int(p->pIdxDeleter, 1, iSegid);
sqlite3_step(p->pIdxDeleter);
p->rc = sqlite3_reset(p->pIdxDeleter);
}
}
/*
@ -2334,12 +2326,22 @@ static void fts5SegIterSeekInit(
/* This block sets stack variable iPg to the leaf page number that may
** contain term (pTerm/nTerm), if it is present in the segment. */
for(h=pSeg->nHeight-1; h>0; h--){
i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, h, iPg);
fts5DataBuffer(p, pBuf, iRowid);
if( p->rc ) break;
iPg = fts5NodeSeek(pBuf, pTerm, nTerm, &bDlidx);
if( p->pIdxSelect==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxSelect, sqlite3_mprintf(
"SELECT pgno, dlidx FROM '%q'.'%q_idx' WHERE "
"segid=? AND term<=? ORDER BY term DESC LIMIT 1",
pConfig->zDb, pConfig->zName
));
}
if( p->rc ) return;
sqlite3_bind_int(p->pIdxSelect, 1, pSeg->iSegid);
sqlite3_bind_blob(p->pIdxSelect, 2, pTerm, nTerm, SQLITE_STATIC);
if( SQLITE_ROW==sqlite3_step(p->pIdxSelect) ){
iPg = sqlite3_column_int(p->pIdxSelect, 0);
bDlidx = sqlite3_column_int(p->pIdxSelect, 1);
}
p->rc = sqlite3_reset(p->pIdxSelect);
if( iPg<pSeg->pgnoFirst ){
iPg = pSeg->pgnoFirst;
@ -3170,52 +3172,52 @@ static int fts5WriteDlidxGrow(
}
/*
** If an "nEmpty" record must be written to the b-tree before the next
** term, write it now.
** If the current doclist-index accumulating in pWriter->aDlidx[] is large
** enough, flush it to disk and return 1. Otherwise discard it and return
** zero.
*/
static void fts5WriteBtreeNEmpty(Fts5Index *p, Fts5SegWriter *pWriter){
if( pWriter->nEmpty ){
int bFlag = 0;
Fts5PageWriter *pPg;
pPg = &pWriter->aWriter[1];
static int fts5WriteFlushDlidx(Fts5Index *p, Fts5SegWriter *pWriter){
int bFlag = 0;
/* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
** to the database, also write the doclist-index to disk. */
if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
bFlag = 1;
}
fts5WriteDlidxClear(p, pWriter, bFlag);
fts5BufferAppendVarint(&p->rc, &pPg->buf, bFlag);
fts5BufferAppendVarint(&p->rc, &pPg->buf, pWriter->nEmpty);
pWriter->nEmpty = 0;
}else{
fts5WriteDlidxClear(p, pWriter, 0);
/* If there were FTS5_MIN_DLIDX_SIZE or more empty leaf pages written
** to the database, also write the doclist-index to disk. */
if( pWriter->aDlidx[0].buf.n>0 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
bFlag = 1;
}
assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].buf.n==0 );
assert( pWriter->nDlidx==0 || pWriter->aDlidx[0].bPrevValid==0 );
fts5WriteDlidxClear(p, pWriter, bFlag);
pWriter->nEmpty = 0;
return bFlag;
}
static void fts5WriteBtreeGrow(Fts5Index *p, Fts5SegWriter *pWriter){
/*
** This function is called whenever processing of the doclist for the
** last term on leaf page (pWriter->iBtPage) is completed.
**
** The doclist-index for that term is currently stored in-memory within the
** Fts5SegWriter.aDlidx[] array. If it is large enough, this function
** writes it out to disk. Or, if it is too small to bother with, discards
** it.
**
** Fts5SegWriter.btterm currently contains the first term on page iBtPage.
*/
static void fts5WriteFlushBtree(Fts5Index *p, Fts5SegWriter *pWriter){
int bFlag;
assert( pWriter->iBtPage || pWriter->nEmpty==0 );
if( pWriter->iBtPage==0 ) return;
bFlag = fts5WriteFlushDlidx(p, pWriter);
if( p->rc==SQLITE_OK ){
Fts5PageWriter *aNew;
Fts5PageWriter *pNew;
int nNew = sizeof(Fts5PageWriter) * (pWriter->nWriter+1);
aNew = (Fts5PageWriter*)sqlite3_realloc(pWriter->aWriter, nNew);
if( aNew==0 ){
p->rc = SQLITE_NOMEM;
return;
}
pNew = &aNew[pWriter->nWriter];
memset(pNew, 0, sizeof(Fts5PageWriter));
pNew->pgno = 1;
fts5BufferAppendVarint(&p->rc, &pNew->buf, 1);
pWriter->nWriter++;
pWriter->aWriter = aNew;
const char *z = (pWriter->btterm.n>0?(const char*)pWriter->btterm.p:"");
/* The following was already done in fts5WriteInit(): */
/* sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid); */
sqlite3_bind_blob(p->pIdxWriter, 2, z, pWriter->btterm.n, SQLITE_STATIC);
sqlite3_bind_int(p->pIdxWriter, 3, pWriter->iBtPage);
sqlite3_bind_int(p->pIdxWriter, 4, bFlag);
sqlite3_step(p->pIdxWriter);
p->rc = sqlite3_reset(p->pIdxWriter);
}
pWriter->iBtPage = 0;
}
/*
@ -3232,36 +3234,9 @@ static void fts5WriteBtreeTerm(
Fts5SegWriter *pWriter, /* Writer object */
int nTerm, const u8 *pTerm /* First term on new page */
){
int iHeight;
for(iHeight=1; 1; iHeight++){
Fts5PageWriter *pPage;
if( iHeight>=pWriter->nWriter ){
fts5WriteBtreeGrow(p, pWriter);
if( p->rc ) return;
}
pPage = &pWriter->aWriter[iHeight];
fts5WriteBtreeNEmpty(p, pWriter);
if( pPage->buf.n>=p->pConfig->pgsz ){
/* pPage will be written to disk. The term will be written into the
** parent of pPage. */
i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, iHeight, pPage->pgno);
fts5DataWrite(p, iRowid, pPage->buf.p, pPage->buf.n);
fts5BufferZero(&pPage->buf);
fts5BufferZero(&pPage->term);
fts5BufferAppendVarint(&p->rc, &pPage->buf, pPage[-1].pgno);
pPage->pgno++;
}else{
int nPre = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
fts5BufferAppendVarint(&p->rc, &pPage->buf, nPre+2);
fts5BufferAppendVarint(&p->rc, &pPage->buf, nTerm-nPre);
fts5BufferAppendBlob(&p->rc, &pPage->buf, nTerm-nPre, pTerm+nPre);
fts5BufferSet(&p->rc, &pPage->term, nTerm, pTerm);
break;
}
}
fts5WriteFlushBtree(p, pWriter);
fts5BufferSet(&p->rc, &pWriter->btterm, nTerm, pTerm);
pWriter->iBtPage = pWriter->writer.pgno;
}
/*
@ -3345,7 +3320,7 @@ static void fts5WriteDlidxAppend(
if( pDlidx->bPrevValid ){
iVal = iRowid - pDlidx->iPrev;
}else{
i64 iPgno = (i==0 ? pWriter->aWriter[0].pgno : pDlidx[-1].pgno);
i64 iPgno = (i==0 ? pWriter->writer.pgno : pDlidx[-1].pgno);
assert( pDlidx->buf.n==0 );
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, !bDone);
sqlite3Fts5BufferAppendVarint(&p->rc, &pDlidx->buf, iPgno);
@ -3360,7 +3335,7 @@ static void fts5WriteDlidxAppend(
static void fts5WriteFlushLeaf(Fts5Index *p, Fts5SegWriter *pWriter){
static const u8 zero[] = { 0x00, 0x00, 0x00, 0x00 };
Fts5PageWriter *pPage = &pWriter->aWriter[0];
Fts5PageWriter *pPage = &pWriter->writer;
i64 iRowid;
if( pWriter->bFirstTermInPage ){
@ -3399,7 +3374,7 @@ static void fts5WriteAppendTerm(
int nTerm, const u8 *pTerm
){
int nPrefix; /* Bytes of prefix compression for term */
Fts5PageWriter *pPage = &pWriter->aWriter[0];
Fts5PageWriter *pPage = &pWriter->writer;
assert( pPage->buf.n==0 || pPage->buf.n>4 );
if( pPage->buf.n==0 ){
@ -3434,7 +3409,7 @@ static void fts5WriteAppendTerm(
n = 1 + fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
}
fts5WriteBtreeTerm(p, pWriter, n, pTerm);
pPage = &pWriter->aWriter[0];
pPage = &pWriter->writer;
}
}else{
nPrefix = fts5PrefixCompress(pPage->term.n, pPage->term.p, nTerm, pTerm);
@ -3472,7 +3447,7 @@ static void fts5WriteAppendRowid(
int nPos
){
if( p->rc==SQLITE_OK ){
Fts5PageWriter *pPage = &pWriter->aWriter[0];
Fts5PageWriter *pPage = &pWriter->writer;
/* If this is to be the first docid written to the page, set the
** docid-pointer in the page-header. Also append a value to the dlidx
@ -3507,7 +3482,7 @@ static void fts5WriteAppendPoslistData(
const u8 *aData,
int nData
){
Fts5PageWriter *pPage = &pWriter->aWriter[0];
Fts5PageWriter *pPage = &pWriter->writer;
const u8 *a = aData;
int n = nData;
@ -3530,7 +3505,7 @@ static void fts5WriteAppendPoslistData(
}
static void fts5WriteAppendZerobyte(Fts5Index *p, Fts5SegWriter *pWriter){
fts5BufferAppendVarint(&p->rc, &pWriter->aWriter[0].buf, 0);
fts5BufferAppendVarint(&p->rc, &pWriter->writer.buf, 0);
}
/*
@ -3544,8 +3519,8 @@ static void fts5WriteFinish(
int *pnLeaf /* OUT: Number of leaf pages in b-tree */
){
int i;
Fts5PageWriter *pLeaf = &pWriter->writer;
if( p->rc==SQLITE_OK ){
Fts5PageWriter *pLeaf = &pWriter->aWriter[0];
if( pLeaf->pgno==1 && pLeaf->buf.n==0 ){
*pnLeaf = 0;
*pnHeight = 0;
@ -3554,29 +3529,14 @@ static void fts5WriteFinish(
fts5WriteFlushLeaf(p, pWriter);
}
*pnLeaf = pLeaf->pgno-1;
if( pWriter->nWriter==1 && pWriter->nEmpty>=FTS5_MIN_DLIDX_SIZE ){
fts5WriteBtreeGrow(p, pWriter);
}
if( pWriter->nWriter>1 ){
fts5WriteBtreeNEmpty(p, pWriter);
}
*pnHeight = pWriter->nWriter;
for(i=1; i<pWriter->nWriter; i++){
Fts5PageWriter *pPg = &pWriter->aWriter[i];
fts5DataWrite(p,
FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pPg->pgno),
pPg->buf.p, pPg->buf.n
);
}
fts5WriteFlushBtree(p, pWriter);
*pnHeight = 0;
}
}
for(i=0; i<pWriter->nWriter; i++){
Fts5PageWriter *pPg = &pWriter->aWriter[i];
fts5BufferFree(&pPg->term);
fts5BufferFree(&pPg->buf);
}
sqlite3_free(pWriter->aWriter);
fts5BufferFree(&pLeaf->term);
fts5BufferFree(&pLeaf->buf);
fts5BufferFree(&pWriter->btterm);
for(i=0; i<pWriter->nDlidx; i++){
sqlite3Fts5BufferFree(&pWriter->aDlidx[i].buf);
@ -3592,48 +3552,21 @@ static void fts5WriteInit(
memset(pWriter, 0, sizeof(Fts5SegWriter));
pWriter->iSegid = iSegid;
pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, sizeof(Fts5PageWriter));
if( fts5WriteDlidxGrow(p, pWriter, 1) ) return;
pWriter->nWriter = 1;
pWriter->nDlidx = 1;
pWriter->aWriter[0].pgno = 1;
fts5WriteDlidxGrow(p, pWriter, 1);
pWriter->writer.pgno = 1;
pWriter->bFirstTermInPage = 1;
}
pWriter->iBtPage = 1;
static void fts5WriteInitForAppend(
Fts5Index *p, /* FTS5 backend object */
Fts5SegWriter *pWriter, /* Writer to initialize */
Fts5StructureSegment *pSeg /* Segment object to append to */
){
int nByte = pSeg->nHeight * sizeof(Fts5PageWriter);
memset(pWriter, 0, sizeof(Fts5SegWriter));
pWriter->iSegid = pSeg->iSegid;
pWriter->aWriter = (Fts5PageWriter*)fts5IdxMalloc(p, nByte);
pWriter->aDlidx = (Fts5DlidxWriter*)fts5IdxMalloc(p, sizeof(Fts5DlidxWriter));
if( p->pIdxWriter==0 ){
Fts5Config *pConfig = p->pConfig;
fts5IndexPrepareStmt(p, &p->pIdxWriter, sqlite3_mprintf(
"INSERT INTO '%q'.'%q_idx'(segid,term,pgno,dlidx) VALUES(?,?,?,?)",
pConfig->zDb, pConfig->zName
));
}
if( p->rc==SQLITE_OK ){
int pgno = 1;
int i;
pWriter->nDlidx = 1;
pWriter->nWriter = pSeg->nHeight;
pWriter->aWriter[0].pgno = pSeg->pgnoLast+1;
for(i=pSeg->nHeight-1; i>0; i--){
i64 iRowid = FTS5_SEGMENT_ROWID(pWriter->iSegid, i, pgno);
Fts5PageWriter *pPg = &pWriter->aWriter[i];
pPg->pgno = pgno;
fts5DataBuffer(p, &pPg->buf, iRowid);
if( p->rc==SQLITE_OK ){
Fts5NodeIter ss;
fts5NodeIterInit(pPg->buf.p, pPg->buf.n, &ss);
while( ss.aData ) fts5NodeIterNext(&p->rc, &ss);
fts5BufferSet(&p->rc, &pPg->term, ss.term.n, ss.term.p);
pgno = ss.iChild;
fts5NodeIterFree(&ss);
}
}
assert( p->rc!=SQLITE_OK || (pgno+pWriter->nEmpty)==pSeg->pgnoLast );
pWriter->bFirstTermInPage = 1;
assert( pWriter->aWriter[0].term.n==0 );
sqlite3_bind_int(p->pIdxWriter, 1, pWriter->iSegid);
}
}
@ -3673,7 +3606,7 @@ static void fts5TrimSegments(Fts5Index *p, Fts5IndexIter *pIter){
fts5BufferAppendBlob(&p->rc, &buf, pData->n - iOff, &pData->p[iOff]);
fts5DataRelease(pData);
pSeg->pSeg->pgnoFirst = pSeg->iTermLeafPgno;
fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1),iLeafRowid);
fts5DataDelete(p, FTS5_SEGMENT_ROWID(iId, 0, 1), iLeafRowid);
fts5DataWrite(p, iLeafRowid, buf.p, buf.n);
}
}
@ -3720,8 +3653,11 @@ static void fts5IndexMergeLevel(
pLvlOut = &pStruct->aLevel[iLvl+1];
assert( pLvlOut->nSeg>0 );
nInput = pLvl->nMerge;
fts5WriteInitForAppend(p, &writer, &pLvlOut->aSeg[pLvlOut->nSeg-1]);
pSeg = &pLvlOut->aSeg[pLvlOut->nSeg-1];
fts5WriteInit(p, &writer, pSeg->iSegid);
writer.writer.pgno = pSeg->pgnoLast+1;
writer.iBtPage = 0;
}else{
int iSegid = fts5AllocateSegid(p, pStruct);
@ -3812,7 +3748,7 @@ static void fts5IndexMergeLevel(
pStruct->nSegment--;
}
}else{
assert( pSeg->nHeight>0 && pSeg->pgnoLast>0 );
assert( pSeg->pgnoLast>0 );
fts5TrimSegments(p, pIter);
pLvl->nMerge = nInput;
}
@ -3987,7 +3923,7 @@ static void fts5FlushOneHash(Fts5Index *p){
/* Pre-allocate the buffer used to assemble leaf pages to the target
** page size. */
assert( pgsz>0 );
pBuf = &writer.aWriter[0].buf;
pBuf = &writer.writer.buf;
fts5BufferGrow(&p->rc, pBuf, pgsz + 20);
/* Begin scanning through hash table entries. This loop runs once for each
@ -4011,7 +3947,7 @@ static void fts5FlushOneHash(Fts5Index *p){
** flush the leaf to disk here. */
if( (pBuf->n + nTerm + 2) > pgsz ){
fts5WriteFlushLeaf(p, &writer);
pBuf = &writer.aWriter[0].buf;
pBuf = &writer.writer.buf;
if( (nTerm + 32) > pBuf->nSpace ){
fts5BufferGrow(&p->rc, pBuf, nTerm + 32 - pBuf->n);
if( p->rc ) break;
@ -4028,10 +3964,10 @@ static void fts5FlushOneHash(Fts5Index *p){
}else{
fts5PutU16(&pBuf->p[2], pBuf->n);
writer.bFirstTermInPage = 0;
if( writer.aWriter[0].pgno!=1 ){
if( writer.writer.pgno!=1 ){
int nPre = fts5PrefixCompress(nTerm, zPrev, nTerm, (const u8*)zTerm);
fts5WriteBtreeTerm(p, &writer, nPre+1, (const u8*)zTerm);
pBuf = &writer.aWriter[0].buf;
pBuf = &writer.writer.buf;
assert( nPre<nTerm );
}
nSuffix = nTerm;
@ -4043,7 +3979,7 @@ static void fts5FlushOneHash(Fts5Index *p){
** doclist-index is to be generated for this doclist, it will be
** associated with this page. */
assert( writer.nDlidx>0 && writer.aDlidx[0].buf.n==0 );
writer.aDlidx[0].pgno = writer.aWriter[0].pgno;
writer.aDlidx[0].pgno = writer.writer.pgno;
if( pgsz>=(pBuf->n + nDoclist + 1) ){
/* The entire doclist will fit on the current leaf. */
@ -4100,7 +4036,7 @@ static void fts5FlushOneHash(Fts5Index *p){
iPos += n;
if( pBuf->n>=pgsz ){
fts5WriteFlushLeaf(p, &writer);
pBuf = &writer.aWriter[0].buf;
pBuf = &writer.writer.buf;
}
if( iPos>=nCopy ) break;
}
@ -4134,7 +4070,6 @@ static void fts5FlushOneHash(Fts5Index *p){
fts5StructurePromote(p, 0, pStruct);
}
fts5IndexAutomerge(p, &pStruct, pgnoLast);
fts5IndexCrisismerge(p, &pStruct);
fts5StructureWrite(p, pStruct);
@ -4562,6 +4497,12 @@ int sqlite3Fts5IndexOpen(
rc = sqlite3Fts5CreateTable(
pConfig, "data", "id INTEGER PRIMARY KEY, block BLOB", 0, pzErr
);
if( rc==SQLITE_OK ){
rc = sqlite3Fts5CreateTable(pConfig, "idx",
"segid, term, pgno, dlidx, PRIMARY KEY(segid, term)",
1, pzErr
);
}
if( rc==SQLITE_OK ){
rc = sqlite3Fts5IndexReinit(p);
}
@ -4585,6 +4526,9 @@ int sqlite3Fts5IndexClose(Fts5Index *p){
assert( p->pReader==0 );
sqlite3_finalize(p->pWriter);
sqlite3_finalize(p->pDeleter);
sqlite3_finalize(p->pIdxWriter);
sqlite3_finalize(p->pIdxDeleter);
sqlite3_finalize(p->pIdxSelect);
sqlite3Fts5HashFree(p->pHash);
sqlite3Fts5BufferFree(&p->scratch);
sqlite3_free(p->zDataTbl);
@ -4934,92 +4878,6 @@ static u64 fts5IndexEntryCksum(
return ret;
}
static void fts5BtreeIterInit(
Fts5Index *p,
Fts5StructureSegment *pSeg,
Fts5BtreeIter *pIter
){
int nByte;
int i;
nByte = sizeof(pIter->aLvl[0]) * (pSeg->nHeight-1);
memset(pIter, 0, sizeof(*pIter));
if( nByte ){
pIter->aLvl = (Fts5BtreeIterLevel*)fts5IdxMalloc(p, nByte);
}
if( p->rc==SQLITE_OK ){
pIter->nLvl = pSeg->nHeight-1;
pIter->p = p;
pIter->pSeg = pSeg;
}
for(i=0; p->rc==SQLITE_OK && i<pIter->nLvl; i++){
i64 iRowid = FTS5_SEGMENT_ROWID(pSeg->iSegid, i+1, 1);
Fts5Data *pData;
pIter->aLvl[i].pData = pData = fts5DataRead(p, iRowid);
if( pData ){
fts5NodeIterInit(pData->p, pData->n, &pIter->aLvl[i].s);
}
}
if( pIter->nLvl==0 || p->rc ){
pIter->bEof = 1;
pIter->iLeaf = pSeg->pgnoLast;
}else{
pIter->nEmpty = pIter->aLvl[0].s.nEmpty;
pIter->iLeaf = pIter->aLvl[0].s.iChild;
pIter->bDlidx = pIter->aLvl[0].s.bDlidx;
}
}
static void fts5BtreeIterNext(Fts5BtreeIter *pIter){
Fts5Index *p = pIter->p;
int i;
assert( pIter->bEof==0 && pIter->aLvl[0].s.aData );
for(i=0; i<pIter->nLvl && p->rc==SQLITE_OK; i++){
Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i];
fts5NodeIterNext(&p->rc, &pLvl->s);
if( pLvl->s.aData ){
fts5BufferSet(&p->rc, &pIter->term, pLvl->s.term.n, pLvl->s.term.p);
break;
}else{
fts5NodeIterFree(&pLvl->s);
fts5DataRelease(pLvl->pData);
pLvl->pData = 0;
}
}
if( i==pIter->nLvl || p->rc ){
pIter->bEof = 1;
}else{
int iSegid = pIter->pSeg->iSegid;
for(i--; i>=0; i--){
Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i];
i64 iRowid = FTS5_SEGMENT_ROWID(iSegid, i+1, pLvl[1].s.iChild);
pLvl->pData = fts5DataRead(p, iRowid);
if( pLvl->pData ){
fts5NodeIterInit(pLvl->pData->p, pLvl->pData->n, &pLvl->s);
}
}
}
pIter->nEmpty = pIter->aLvl[0].s.nEmpty;
pIter->bDlidx = pIter->aLvl[0].s.bDlidx;
pIter->iLeaf = pIter->aLvl[0].s.iChild;
}
static void fts5BtreeIterFree(Fts5BtreeIter *pIter){
int i;
for(i=0; i<pIter->nLvl; i++){
Fts5BtreeIterLevel *pLvl = &pIter->aLvl[i];
fts5NodeIterFree(&pLvl->s);
if( pLvl->pData ){
fts5DataRelease(pLvl->pData);
pLvl->pData = 0;
}
}
sqlite3_free(pIter->aLvl);
fts5BufferFree(&pIter->term);
}
#ifdef SQLITE_DEBUG
/*
** This function is purely an internal test. It does not contribute to
@ -5167,33 +5025,74 @@ static void fts5TestTerm(
# define fts5TestTerm(u,v,w,x,y,z)
#endif
/*
** Check that:
**
** 1) All leaves of pSeg between iFirst and iLast (inclusive) exist and
** contain zero terms.
** 2) All leaves of pSeg between iNoRowid and iLast (inclusive) exist and
** contain zero rowids.
*/
static void fts5IndexIntegrityCheckEmpty(
Fts5Index *p,
Fts5StructureSegment *pSeg, /* Segment to check internal consistency */
int iFirst,
int iNoRowid,
int iLast
){
int i;
/* Now check that the iter.nEmpty leaves following the current leaf
** (a) exist and (b) contain no terms. */
for(i=iFirst; p->rc==SQLITE_OK && i<=iLast; i++){
Fts5Data *pLeaf = fts5DataRead(p, FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, i));
if( pLeaf ){
if( 0!=fts5GetU16(&pLeaf->p[2]) ) p->rc = FTS5_CORRUPT;
if( i>=iNoRowid && 0!=fts5GetU16(&pLeaf->p[0]) ) p->rc = FTS5_CORRUPT;
}
fts5DataRelease(pLeaf);
if( p->rc ) break;
}
}
static void fts5IndexIntegrityCheckSegment(
Fts5Index *p, /* FTS5 backend object */
Fts5StructureSegment *pSeg /* Segment to check internal consistency */
){
Fts5BtreeIter iter; /* Used to iterate through b-tree hierarchy */
Fts5Config *pConfig = p->pConfig;
sqlite3_stmt *pStmt = 0;
int rc2;
int iIdxPrevLeaf = pSeg->pgnoFirst-1;
int iDlidxPrevLeaf = pSeg->pgnoLast;
if( pSeg->pgnoFirst==0 ) return;
fts5IndexPrepareStmt(p, &pStmt, sqlite3_mprintf(
"SELECT segid, term, pgno, dlidx FROM '%q'.'%q_idx' WHERE segid=%d",
pConfig->zDb, pConfig->zName, pSeg->iSegid
));
/* Iterate through the b-tree hierarchy. */
for(fts5BtreeIterInit(p, pSeg, &iter);
p->rc==SQLITE_OK && iter.bEof==0;
fts5BtreeIterNext(&iter)
){
while( p->rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
i64 iRow; /* Rowid for this leaf */
Fts5Data *pLeaf; /* Data for this leaf */
int iOff; /* Offset of first term on leaf */
int i; /* Used to iterate through empty leaves */
int nIdxTerm = sqlite3_column_bytes(pStmt, 1);
const char *zIdxTerm = (const char*)sqlite3_column_text(pStmt, 1);
int iIdxLeaf = sqlite3_column_int(pStmt, 2);
int bIdxDlidx = sqlite3_column_int(pStmt, 3);
/* If the leaf in question has already been trimmed from the segment,
** ignore this b-tree entry. Otherwise, load it into memory. */
if( iter.iLeaf<pSeg->pgnoFirst ) continue;
iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iter.iLeaf);
if( iIdxLeaf<pSeg->pgnoFirst ) continue;
iRow = FTS5_SEGMENT_ROWID(pSeg->iSegid, 0, iIdxLeaf);
pLeaf = fts5DataRead(p, iRow);
if( pLeaf==0 ) break;
/* Check that the leaf contains at least one term, and that it is equal
** to or larger than the split-key in iter.term. Also check that if there
** to or larger than the split-key in zIdxTerm. Also check that if there
** is also a rowid pointer within the leaf page header, it points to a
** location before the term. */
iOff = fts5GetU16(&pLeaf->p[2]);
@ -5209,8 +5108,8 @@ static void fts5IndexIntegrityCheckSegment(
p->rc = FTS5_CORRUPT;
}else{
iOff += fts5GetVarint32(&pLeaf->p[iOff], nTerm);
res = memcmp(&pLeaf->p[iOff], iter.term.p, MIN(nTerm, iter.term.n));
if( res==0 ) res = nTerm - iter.term.n;
res = memcmp(&pLeaf->p[iOff], zIdxTerm, MIN(nTerm, nIdxTerm));
if( res==0 ) res = nTerm - nIdxTerm;
if( res<0 ) p->rc = FTS5_CORRUPT;
}
}
@ -5220,23 +5119,20 @@ static void fts5IndexIntegrityCheckSegment(
/* Now check that the iter.nEmpty leaves following the current leaf
** (a) exist and (b) contain no terms. */
for(i=1; p->rc==SQLITE_OK && i<=iter.nEmpty; i++){
pLeaf = fts5DataRead(p, iRow+i);
if( pLeaf && 0!=fts5GetU16(&pLeaf->p[2]) ){
p->rc = FTS5_CORRUPT;
}
fts5DataRelease(pLeaf);
}
fts5IndexIntegrityCheckEmpty(
p, pSeg, iIdxPrevLeaf+1, iDlidxPrevLeaf+1, iIdxLeaf-1
);
if( p->rc ) break;
/* If there is a doclist-index, check that it looks right. */
if( iter.bDlidx ){
if( bIdxDlidx ){
Fts5DlidxIter *pDlidx = 0; /* For iterating through doclist index */
int iPrevLeaf = iter.iLeaf;
int iPrevLeaf = iIdxLeaf;
int iSegid = pSeg->iSegid;
int iPg;
i64 iKey;
for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iter.iLeaf);
for(pDlidx=fts5DlidxIterInit(p, 0, iSegid, iIdxLeaf);
fts5DlidxIterEof(p, pDlidx)==0;
fts5DlidxIterNext(p, pDlidx)
){
@ -5269,26 +5165,26 @@ static void fts5IndexIntegrityCheckSegment(
}
}
for(iPg=iPrevLeaf+1; iPg<=(iter.iLeaf + iter.nEmpty); iPg++){
iKey = FTS5_SEGMENT_ROWID(iSegid, 0, iPg);
pLeaf = fts5DataRead(p, iKey);
if( pLeaf ){
if( fts5GetU16(&pLeaf->p[0])!=0 ) p->rc = FTS5_CORRUPT;
fts5DataRelease(pLeaf);
}
}
iDlidxPrevLeaf = iPg;
fts5DlidxIterFree(pDlidx);
fts5TestDlidxReverse(p, iSegid, iter.iLeaf);
fts5TestDlidxReverse(p, iSegid, iIdxLeaf);
}else{
iDlidxPrevLeaf = pSeg->pgnoLast;
/* TODO: Check there is no doclist index */
}
iIdxPrevLeaf = iIdxLeaf;
}
rc2 = sqlite3_finalize(pStmt);
if( p->rc==SQLITE_OK ) p->rc = rc2;
/* Page iter.iLeaf must now be the rightmost leaf-page in the segment */
#if 0
if( p->rc==SQLITE_OK && iter.iLeaf!=pSeg->pgnoLast ){
p->rc = FTS5_CORRUPT;
}
fts5BtreeIterFree(&iter);
#endif
}

View File

@ -180,8 +180,10 @@ static int fts5ExecPrintf(
int sqlite3Fts5DropAll(Fts5Config *pConfig){
int rc = fts5ExecPrintf(pConfig->db, 0,
"DROP TABLE IF EXISTS %Q.'%q_data';"
"DROP TABLE IF EXISTS %Q.'%q_idx';"
"DROP TABLE IF EXISTS %Q.'%q_config';",
pConfig->zDb, pConfig->zName,
pConfig->zDb, pConfig->zName,
pConfig->zDb, pConfig->zName
);
if( rc==SQLITE_OK && pConfig->bColumnsize ){
@ -218,6 +220,7 @@ int sqlite3Fts5StorageRename(Fts5Storage *pStorage, const char *zName){
int rc = sqlite3Fts5StorageSync(pStorage, 1);
fts5StorageRenameOne(pConfig, &rc, "data", zName);
fts5StorageRenameOne(pConfig, &rc, "idx", zName);
fts5StorageRenameOne(pConfig, &rc, "config", zName);
if( pConfig->bColumnsize ){
fts5StorageRenameOne(pConfig, &rc, "docsize", zName);

View File

@ -27,6 +27,7 @@ do_execsql_test 1.0 {
} {
t1 {CREATE VIRTUAL TABLE t1 USING fts5(a, b, c)}
t1_data {CREATE TABLE 't1_data'(id INTEGER PRIMARY KEY, block BLOB)}
t1_idx {CREATE TABLE 't1_idx'(segid, term, pgno, dlidx, PRIMARY KEY(segid, term)) WITHOUT ROWID}
t1_content {CREATE TABLE 't1_content'(id INTEGER PRIMARY KEY, c0, c1, c2)}
t1_docsize {CREATE TABLE 't1_docsize'(id INTEGER PRIMARY KEY, sz BLOB)}
t1_config {CREATE TABLE 't1_config'(k PRIMARY KEY, v) WITHOUT ROWID}
@ -47,9 +48,10 @@ do_execsql_test 2.0 {
do_execsql_test 2.1 {
INSERT INTO t1 VALUES('a b c', 'd e f');
}
do_test 2.2 {
execsql { SELECT fts5_decode(id, block) FROM t1_data WHERE id==10 }
} {/{\(structure\) {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=1 leaves=1..1}}}/}
} {/{\(structure\) {lvl=0 nMerge=0 nSeg=1 {id=[0123456789]* h=0 leaves=1..1}}}/}
foreach w {a b c d e f} {
do_execsql_test 2.3.$w.asc {

View File

@ -247,7 +247,7 @@ reset_db
do_execsql_test 6.1 {
CREATE VIRTUAL TABLE xx USING fts5(x, y, content="");
SELECT name FROM sqlite_master;
} {xx xx_data xx_docsize xx_config}
} {xx xx_data xx_idx xx_docsize xx_config}
do_execsql_test 6.2 {
DROP TABLE xx;
SELECT name FROM sqlite_master;

View File

@ -1,5 +1,5 @@
C Fix\ssome\sharmless\scompiler\swarnings.
D 2015-07-15T18:35:54.200
C Use\sa\sWITHOUT\sROWID\stable\sto\sindex\sfts5\sbtree\sleaves.\sThis\sis\sfaster\sto\squery\sand\sonly\sslightly\slarger\sthan\sstoring\sbtree\snodes\swithin\san\sintkey\stable.
D 2015-07-15T19:46:02.242
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 6e8af213d49e6325bf283ebed7662254f8e15bda
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -109,12 +109,12 @@ F ext/fts5/fts5.h 81d1a92fc2b4bd477af7e4e0b38b456f3e199fba
F ext/fts5/fts5Int.h 8d9bce1847a10df2e4ed9492ea4f3868276748fb
F ext/fts5/fts5_aux.c 044cb176a815f4388308738437f6e130aa384fb0
F ext/fts5/fts5_buffer.c 80f9ba4431848cb857e3d2158f5280093dcd8015
F ext/fts5/fts5_config.c b2456e9625bca41c51d54c363e369c6356895c90
F ext/fts5/fts5_config.c fdfa63ae8e527ecfaa50f94063c610429cc887cf
F ext/fts5/fts5_expr.c d2e148345639c5a5583e0daa39a639bf298ae6a7
F ext/fts5/fts5_hash.c 219f4edd72e5cf95b19c33f1058809a18fad5229
F ext/fts5/fts5_index.c cfd41d49591e4e4ce2a5f84de35512f59fbb360d
F ext/fts5/fts5_index.c 7fe8e8afdb872b55726263b2a82288ebabda969c
F ext/fts5/fts5_main.c 8f279999deb204b0c7760464f60f88666046398b
F ext/fts5/fts5_storage.c 1c35a38a564ee9cadcbd7ae0b13a806bdda722bd
F ext/fts5/fts5_storage.c 877399c557f273a725b5e4fc26f07e67ca90570a
F ext/fts5/fts5_tcl.c 85eb4e0d0fefa9420b78151496ad4599a1783e20
F ext/fts5/fts5_tokenize.c 30f97a8c74683797b4cd233790444fbefb3b0708
F ext/fts5/fts5_unicode2.c 78273fbd588d1d9bd0a7e4e0ccc9207348bae33c
@ -123,7 +123,7 @@ F ext/fts5/fts5_vocab.c 4e268a3fcbc099e50e335a1135be985a41ff6f7f
F ext/fts5/fts5parse.y 833db1101b78c0c47686ab1b84918e38c36e9452
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
F ext/fts5/test/fts5_common.tcl e0b4a846a7670f6232a644ece69ef25a5c19c0e8
F ext/fts5/test/fts5aa.test 4e896b9154764fed48179a87ba0bdf3650d7f49d
F ext/fts5/test/fts5aa.test 8dac4216e5ad8fd240cff078d1893520bb1f5fb2
F ext/fts5/test/fts5ab.test 6fe3a56731d15978afbb74ae51b355fc9310f2ad
F ext/fts5/test/fts5ac.test 9737992d08c56bfd4803e933744d2d764e23795c
F ext/fts5/test/fts5ad.test b2edee8b7de0c21d2c88f8a18c195034aad6952d
@ -142,7 +142,7 @@ F ext/fts5/test/fts5auxdata.test 141a7cbffcceb1bd2799b4b29c183ff8780d586e
F ext/fts5/test/fts5bigpl.test 04ee0d7eebbebf17c31f5a0b5c5f9494eac3a0cb
F ext/fts5/test/fts5columnsize.test 97dc6bd66c91009d00407aa078dd5e9e8eb22f99
F ext/fts5/test/fts5config.test ad2ff42ddc856aed2d05bf89dc1c578c8a39ea3b
F ext/fts5/test/fts5content.test d0d90a45f0bcf07d75d474500d81f941b45e2021
F ext/fts5/test/fts5content.test 9a952c95518a14182dc3b59e3c8fa71cda82a4e1
F ext/fts5/test/fts5corrupt.test 928c9c91d40690d301f943a7ed0ffc19e0d0e7b6
F ext/fts5/test/fts5corrupt2.test 1a830ccd6dbe1b601c7e3f5bbc1cf77bd8c8803b
F ext/fts5/test/fts5corrupt3.test 1ccf575f5126e79f9fec7979fd02a1f40a076be3
@ -1365,7 +1365,10 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P b522c95ddcd7046dca756f4d1a1e90c34dbcab64
R 5ae06ee700ba79856dd8d83789b8e902
U drh
Z c7acc62193fe6f94b78724b3b176c9a0
P 110cd84f5e842c4dcd9b9398cea211e25f36b3aa
R 6be01aa6bafcfd1604a2d6d0b1df9b9e
T *branch * fts5-btree-index
T *sym-fts5-btree-index *
T -sym-trunk *
U dan
Z 0f1494b26034248995977524e0d0424a

View File

@ -1 +1 @@
110cd84f5e842c4dcd9b9398cea211e25f36b3aa
862418e3506d4b7cca9c44d58c2eb9dc915d75c9