From 7c3ca3611c5601212e2171da2ac700d612913712 Mon Sep 17 00:00:00 2001 From: dan Date: Thu, 10 Aug 2017 20:36:56 +0000 Subject: [PATCH] Add the "instance" type to the fts5vocab virtual table module. For direct access to the contents of the fts5 term index. FossilOrigin-Name: 34a7bd7121a478e14982d59be95ad891fab8050ad5e006638f826c57c392b93e --- ext/fts5/fts5_vocab.c | 165 +++++++++++++++++++++++++++++++++++------- manifest | 14 ++-- manifest.uuid | 2 +- 3 files changed, 147 insertions(+), 34 deletions(-) diff --git a/ext/fts5/fts5_vocab.c b/ext/fts5/fts5_vocab.c index 82c7dc9056..e48a54c9ab 100644 --- a/ext/fts5/fts5_vocab.c +++ b/ext/fts5/fts5_vocab.c @@ -29,6 +29,11 @@ ** the number of fts5 rows that contain at least one instance of term ** $term. Field $cnt is set to the total number of instances of term ** $term in the database. +** +** instance: +** CREATE TABLE vocab(term, doc, col, offset, PRIMARY KEY()); +** +** One row for each term instance in the database. */ @@ -44,7 +49,7 @@ struct Fts5VocabTable { char *zFts5Db; /* Db containing fts5 table */ sqlite3 *db; /* Database handle */ Fts5Global *pGlobal; /* FTS5 global object for this database */ - int eType; /* FTS5_VOCAB_COL or ROW */ + int eType; /* FTS5_VOCAB_COL, ROW or INSTANCE */ }; struct Fts5VocabCursor { @@ -64,16 +69,22 @@ struct Fts5VocabCursor { i64 *aCnt; i64 *aDoc; - /* Output values used by 'row' and 'col' tables */ + /* Output values used by all tables. */ i64 rowid; /* This table's current rowid value */ Fts5Buffer term; /* Current value of 'term' column */ + + /* Output values Used by 'instance' tables only */ + i64 iInstPos; + int iInstOff; }; -#define FTS5_VOCAB_COL 0 -#define FTS5_VOCAB_ROW 1 +#define FTS5_VOCAB_COL 0 +#define FTS5_VOCAB_ROW 1 +#define FTS5_VOCAB_INSTANCE 2 #define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt" #define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt" +#define FTS5_VOCAB_INST_SCHEMA "term, doc, col, offset" /* ** Bits for the mask used as the idxNum value by xBestIndex/xFilter. @@ -101,6 +112,9 @@ static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){ if( sqlite3_stricmp(zCopy, "row")==0 ){ *peType = FTS5_VOCAB_ROW; }else + if( sqlite3_stricmp(zCopy, "instance")==0 ){ + *peType = FTS5_VOCAB_INSTANCE; + }else { *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy); rc = SQLITE_ERROR; @@ -161,7 +175,8 @@ static int fts5VocabInitVtab( ){ const char *azSchema[] = { "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")", - "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")" + "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")", + "CREATE TABlE vocab(" FTS5_VOCAB_INST_SCHEMA ")" }; Fts5VocabTable *pRet = 0; @@ -235,6 +250,15 @@ static int fts5VocabCreateMethod( /* ** Implementation of the xBestIndex method. +** +** Only constraints of the form: +** +** term <= ? +** term == ? +** term >= ? +** +** are interpreted. Less-than and less-than-or-equal are treated +** identically, as are greater-than and greater-than-or-equal. */ static int fts5VocabBestIndexMethod( sqlite3_vtab *pUnused, @@ -378,6 +402,54 @@ static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){ return SQLITE_OK; } +static int fts5VocabInstanceNewTerm(Fts5VocabCursor *pCsr){ + int rc = SQLITE_OK; + + if( sqlite3Fts5IterEof(pCsr->pIter) ){ + pCsr->bEof = 1; + }else{ + const char *zTerm; + int nTerm; + zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); + if( pCsr->nLeTerm>=0 ){ + int nCmp = MIN(nTerm, pCsr->nLeTerm); + int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp); + if( bCmp<0 || (bCmp==0 && pCsr->nLeTermbEof = 1; + } + } + + sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm); + } + return rc; +} + +static int fts5VocabInstanceNext(Fts5VocabCursor *pCsr){ + int eDetail = pCsr->pConfig->eDetail; + int rc = SQLITE_OK; + Fts5IndexIter *pIter = pCsr->pIter; + i64 *pp = &pCsr->iInstPos; + int *po = &pCsr->iInstOff; + + while( eDetail==FTS5_DETAIL_NONE + || sqlite3Fts5PoslistNext64(pIter->pData, pIter->nData, po, pp) + ){ + pCsr->iInstPos = 0; + pCsr->iInstOff = 0; + + rc = sqlite3Fts5IterNextScan(pCsr->pIter); + if( rc==SQLITE_OK ){ + rc = fts5VocabInstanceNewTerm(pCsr); + if( eDetail==FTS5_DETAIL_NONE ) break; + } + if( rc ){ + pCsr->bEof = 1; + break; + } + } + + return rc; +} /* ** Advance the cursor to the next row in the table. @@ -390,13 +462,17 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ pCsr->rowid++; + if( pTab->eType==FTS5_VOCAB_INSTANCE ){ + return fts5VocabInstanceNext(pCsr); + } + if( pTab->eType==FTS5_VOCAB_COL ){ for(pCsr->iCol++; pCsr->iColiCol++){ if( pCsr->aDoc[pCsr->iCol] ) break; } } - if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=nCol ){ + if( pTab->eType!=FTS5_VOCAB_COL || pCsr->iCol>=nCol ){ if( sqlite3Fts5IterEof(pCsr->pIter) ){ pCsr->bEof = 1; }else{ @@ -420,22 +496,26 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); while( rc==SQLITE_OK ){ + int eDetail = pCsr->pConfig->eDetail; const u8 *pPos; int nPos; /* Position list */ i64 iPos = 0; /* 64-bit position read from poslist */ int iOff = 0; /* Current offset within position list */ pPos = pCsr->pIter->pData; nPos = pCsr->pIter->nData; - switch( pCsr->pConfig->eDetail ){ - case FTS5_DETAIL_FULL: - pPos = pCsr->pIter->pData; - nPos = pCsr->pIter->nData; - if( pTab->eType==FTS5_VOCAB_ROW ){ + + switch( pTab->eType ){ + case FTS5_VOCAB_ROW: + if( eDetail==FTS5_DETAIL_FULL ){ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ pCsr->aCnt[0]++; } - pCsr->aDoc[0]++; - }else{ + } + pCsr->aDoc[0]++; + break; + + case FTS5_VOCAB_COL: + if( eDetail==FTS5_DETAIL_FULL ){ int iCol = -1; while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ int ii = FTS5_POS2COLUMN(iPos); @@ -449,13 +529,7 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ iCol = ii; } } - } - break; - - case FTS5_DETAIL_COLUMNS: - if( pTab->eType==FTS5_VOCAB_ROW ){ - pCsr->aDoc[0]++; - }else{ + }else if( eDetail==FTS5_DETAIL_COLUMNS ){ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){ assert_nc( iPos>=0 && iPos=nCol ){ @@ -464,18 +538,21 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){ } pCsr->aDoc[iPos]++; } + }else{ + assert( eDetail==FTS5_DETAIL_NONE ); + pCsr->aDoc[0]++; } break; - default: - assert( pCsr->pConfig->eDetail==FTS5_DETAIL_NONE ); - pCsr->aDoc[0]++; + default: + assert( pTab->eType==FTS5_VOCAB_INSTANCE ); break; } if( rc==SQLITE_OK ){ rc = sqlite3Fts5IterNextScan(pCsr->pIter); } + if( pTab->eType==FTS5_VOCAB_INSTANCE ) break; if( rc==SQLITE_OK ){ zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); @@ -505,7 +582,9 @@ static int fts5VocabFilterMethod( int nUnused, /* Number of elements in apVal */ sqlite3_value **apVal /* Arguments for the indexing scheme */ ){ + Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab; Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; + int eType = pTab->eType; int rc = SQLITE_OK; int iVal = 0; @@ -545,11 +624,16 @@ static int fts5VocabFilterMethod( } } - if( rc==SQLITE_OK ){ rc = sqlite3Fts5IndexQuery(pCsr->pIndex, zTerm, nTerm, f, 0, &pCsr->pIter); } - if( rc==SQLITE_OK ){ + if( rc==SQLITE_OK && eType==FTS5_VOCAB_INSTANCE ){ + rc = fts5VocabInstanceNewTerm(pCsr); + } + if( rc==SQLITE_OK + && !pCsr->bEof + && (eType!=FTS5_VOCAB_INSTANCE || pCsr->pConfig->eDetail!=FTS5_DETAIL_NONE) + ){ rc = fts5VocabNextMethod(pCursor); } @@ -591,13 +675,42 @@ static int fts5VocabColumnMethod( }else{ iVal = pCsr->aCnt[pCsr->iCol]; } - }else{ + }else if( eType==FTS5_VOCAB_ROW ){ assert( iCol==1 || iCol==2 ); if( iCol==1 ){ iVal = pCsr->aDoc[0]; }else{ iVal = pCsr->aCnt[0]; } + }else{ + int eDetail = pCsr->pConfig->eDetail; + assert( eType==FTS5_VOCAB_INSTANCE ); + switch( iCol ){ + case 1: + sqlite3_result_int64(pCtx, pCsr->pIter->iRowid); + break; + case 2: { + int ii = -1; + if( eDetail==FTS5_DETAIL_FULL ){ + ii = FTS5_POS2COLUMN(pCsr->iInstPos); + }else if( eDetail==FTS5_DETAIL_COLUMNS ){ + ii = pCsr->iInstPos; + } + if( ii>=0 && iipConfig->nCol ){ + const char *z = pCsr->pConfig->azCol[ii]; + sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC); + } + break; + } + default: { + assert( iCol==3 ); + if( eDetail==FTS5_DETAIL_FULL ){ + int ii = FTS5_POS2OFFSET(pCsr->iInstPos); + sqlite3_result_int(pCtx, ii); + } + break; + } + } } if( iVal>0 ) sqlite3_result_int64(pCtx, iVal); diff --git a/manifest b/manifest index 2717e23e3d..17ea787547 100644 --- a/manifest +++ b/manifest @@ -1,5 +1,5 @@ -C Add\sa\sfirst\sdraft\sof\sthe\s"vtablog"\sextensions\sthat\simplements\sa\sgeneric\nvirtual\stable\suseful\sfor\sexperimentation\susing\sthe\scommand-line\sshell. -D 2017-08-10T03:27:27.239 +C Add\sthe\s"instance"\stype\sto\sthe\sfts5vocab\svirtual\stable\smodule.\sFor\sdirect\naccess\sto\sthe\scontents\sof\sthe\sfts5\sterm\sindex. +D 2017-08-10T20:36:56.419 F Makefile.in d9873c9925917cca9990ee24be17eb9613a668012c85a343aef7e5536ae266e8 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.msc 02b469e9dcd5b7ee63fc1fb05babc174260ee4cfa4e0ef2e48c3c6801567a016 @@ -114,7 +114,7 @@ F ext/fts5/fts5_test_tok.c ffd657dd67e7fcdb31bf63fb60b6d867299a581d0f46e97086aba F ext/fts5/fts5_tokenize.c 2ce7b44183538ec46b7907726262ee43ffdd39a8 F ext/fts5/fts5_unicode2.c b450b209b157d598f7b9df9f837afb75a14c24bf F ext/fts5/fts5_varint.c a5aceacda04dafcbae725413d7a16818ecd65738 -F ext/fts5/fts5_vocab.c e44fefa7f0c1db252998af071daf06a7147e17e7 +F ext/fts5/fts5_vocab.c 90783d59cb8ee29ae08ac7f7e1f9c04cc4fb3ffc46d34fedba96d145636dd39d F ext/fts5/fts5parse.y a070b538e08ae9e2177d15c337ed2a3464408f0f886e746307098f746efd94ca F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/test/fts5_common.tcl b01c584144b5064f30e6c648145a2dd6bc440841 @@ -1645,7 +1645,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93 F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 -P dcdf091388251292ff9939bdff920708320bc64dacfe0fa1878c5ffd11b679c9 -R 089d3b3d49f7f300de0a7e207b239b0f -U drh -Z e2065e8cae31fe4aaa6c975ba17a09f2 +P e49279e65169a939b6058a0960dc1fe09ce4ee2d78992a1969773cbc7ce1043b +R 51c4219f1ac2413928699169835fef62 +U dan +Z c67de475a6759cd5bb3ba77ac9b27176 diff --git a/manifest.uuid b/manifest.uuid index 2a55521c23..6de194638d 100644 --- a/manifest.uuid +++ b/manifest.uuid @@ -1 +1 @@ -e49279e65169a939b6058a0960dc1fe09ce4ee2d78992a1969773cbc7ce1043b \ No newline at end of file +34a7bd7121a478e14982d59be95ad891fab8050ad5e006638f826c57c392b93e \ No newline at end of file