Add the "instance" type to the fts5vocab virtual table module. For direct

access to the contents of the fts5 term index.

FossilOrigin-Name: 34a7bd7121a478e14982d59be95ad891fab8050ad5e006638f826c57c392b93e
This commit is contained in:
dan 2017-08-10 20:36:56 +00:00
parent 6fa9375c01
commit 7c3ca3611c
3 changed files with 147 additions and 34 deletions

View File

@ -29,6 +29,11 @@
** the number of fts5 rows that contain at least one instance of term ** the number of fts5 rows that contain at least one instance of term
** $term. Field $cnt is set to the total number of instances of term ** $term. Field $cnt is set to the total number of instances of term
** $term in the database. ** $term in the database.
**
** instance:
** CREATE TABLE vocab(term, doc, col, offset, PRIMARY KEY(<all-fields>));
**
** One row for each term instance in the database.
*/ */
@ -44,7 +49,7 @@ struct Fts5VocabTable {
char *zFts5Db; /* Db containing fts5 table */ char *zFts5Db; /* Db containing fts5 table */
sqlite3 *db; /* Database handle */ sqlite3 *db; /* Database handle */
Fts5Global *pGlobal; /* FTS5 global object for this database */ Fts5Global *pGlobal; /* FTS5 global object for this database */
int eType; /* FTS5_VOCAB_COL or ROW */ int eType; /* FTS5_VOCAB_COL, ROW or INSTANCE */
}; };
struct Fts5VocabCursor { struct Fts5VocabCursor {
@ -64,16 +69,22 @@ struct Fts5VocabCursor {
i64 *aCnt; i64 *aCnt;
i64 *aDoc; i64 *aDoc;
/* Output values used by 'row' and 'col' tables */ /* Output values used by all tables. */
i64 rowid; /* This table's current rowid value */ i64 rowid; /* This table's current rowid value */
Fts5Buffer term; /* Current value of 'term' column */ Fts5Buffer term; /* Current value of 'term' column */
/* Output values Used by 'instance' tables only */
i64 iInstPos;
int iInstOff;
}; };
#define FTS5_VOCAB_COL 0 #define FTS5_VOCAB_COL 0
#define FTS5_VOCAB_ROW 1 #define FTS5_VOCAB_ROW 1
#define FTS5_VOCAB_INSTANCE 2
#define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt" #define FTS5_VOCAB_COL_SCHEMA "term, col, doc, cnt"
#define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt" #define FTS5_VOCAB_ROW_SCHEMA "term, doc, cnt"
#define FTS5_VOCAB_INST_SCHEMA "term, doc, col, offset"
/* /*
** Bits for the mask used as the idxNum value by xBestIndex/xFilter. ** Bits for the mask used as the idxNum value by xBestIndex/xFilter.
@ -101,6 +112,9 @@ static int fts5VocabTableType(const char *zType, char **pzErr, int *peType){
if( sqlite3_stricmp(zCopy, "row")==0 ){ if( sqlite3_stricmp(zCopy, "row")==0 ){
*peType = FTS5_VOCAB_ROW; *peType = FTS5_VOCAB_ROW;
}else }else
if( sqlite3_stricmp(zCopy, "instance")==0 ){
*peType = FTS5_VOCAB_INSTANCE;
}else
{ {
*pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy); *pzErr = sqlite3_mprintf("fts5vocab: unknown table type: %Q", zCopy);
rc = SQLITE_ERROR; rc = SQLITE_ERROR;
@ -161,7 +175,8 @@ static int fts5VocabInitVtab(
){ ){
const char *azSchema[] = { const char *azSchema[] = {
"CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")", "CREATE TABlE vocab(" FTS5_VOCAB_COL_SCHEMA ")",
"CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")" "CREATE TABlE vocab(" FTS5_VOCAB_ROW_SCHEMA ")",
"CREATE TABlE vocab(" FTS5_VOCAB_INST_SCHEMA ")"
}; };
Fts5VocabTable *pRet = 0; Fts5VocabTable *pRet = 0;
@ -235,6 +250,15 @@ static int fts5VocabCreateMethod(
/* /*
** Implementation of the xBestIndex method. ** Implementation of the xBestIndex method.
**
** Only constraints of the form:
**
** term <= ?
** term == ?
** term >= ?
**
** are interpreted. Less-than and less-than-or-equal are treated
** identically, as are greater-than and greater-than-or-equal.
*/ */
static int fts5VocabBestIndexMethod( static int fts5VocabBestIndexMethod(
sqlite3_vtab *pUnused, sqlite3_vtab *pUnused,
@ -378,6 +402,54 @@ static int fts5VocabCloseMethod(sqlite3_vtab_cursor *pCursor){
return SQLITE_OK; return SQLITE_OK;
} }
static int fts5VocabInstanceNewTerm(Fts5VocabCursor *pCsr){
int rc = SQLITE_OK;
if( sqlite3Fts5IterEof(pCsr->pIter) ){
pCsr->bEof = 1;
}else{
const char *zTerm;
int nTerm;
zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
if( pCsr->nLeTerm>=0 ){
int nCmp = MIN(nTerm, pCsr->nLeTerm);
int bCmp = memcmp(pCsr->zLeTerm, zTerm, nCmp);
if( bCmp<0 || (bCmp==0 && pCsr->nLeTerm<nTerm) ){
pCsr->bEof = 1;
}
}
sqlite3Fts5BufferSet(&rc, &pCsr->term, nTerm, (const u8*)zTerm);
}
return rc;
}
static int fts5VocabInstanceNext(Fts5VocabCursor *pCsr){
int eDetail = pCsr->pConfig->eDetail;
int rc = SQLITE_OK;
Fts5IndexIter *pIter = pCsr->pIter;
i64 *pp = &pCsr->iInstPos;
int *po = &pCsr->iInstOff;
while( eDetail==FTS5_DETAIL_NONE
|| sqlite3Fts5PoslistNext64(pIter->pData, pIter->nData, po, pp)
){
pCsr->iInstPos = 0;
pCsr->iInstOff = 0;
rc = sqlite3Fts5IterNextScan(pCsr->pIter);
if( rc==SQLITE_OK ){
rc = fts5VocabInstanceNewTerm(pCsr);
if( eDetail==FTS5_DETAIL_NONE ) break;
}
if( rc ){
pCsr->bEof = 1;
break;
}
}
return rc;
}
/* /*
** Advance the cursor to the next row in the table. ** Advance the cursor to the next row in the table.
@ -390,13 +462,17 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
pCsr->rowid++; pCsr->rowid++;
if( pTab->eType==FTS5_VOCAB_INSTANCE ){
return fts5VocabInstanceNext(pCsr);
}
if( pTab->eType==FTS5_VOCAB_COL ){ if( pTab->eType==FTS5_VOCAB_COL ){
for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){ for(pCsr->iCol++; pCsr->iCol<nCol; pCsr->iCol++){
if( pCsr->aDoc[pCsr->iCol] ) break; if( pCsr->aDoc[pCsr->iCol] ) break;
} }
} }
if( pTab->eType==FTS5_VOCAB_ROW || pCsr->iCol>=nCol ){ if( pTab->eType!=FTS5_VOCAB_COL || pCsr->iCol>=nCol ){
if( sqlite3Fts5IterEof(pCsr->pIter) ){ if( sqlite3Fts5IterEof(pCsr->pIter) ){
pCsr->bEof = 1; pCsr->bEof = 1;
}else{ }else{
@ -420,22 +496,26 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW ); assert( pTab->eType==FTS5_VOCAB_COL || pTab->eType==FTS5_VOCAB_ROW );
while( rc==SQLITE_OK ){ while( rc==SQLITE_OK ){
int eDetail = pCsr->pConfig->eDetail;
const u8 *pPos; int nPos; /* Position list */ const u8 *pPos; int nPos; /* Position list */
i64 iPos = 0; /* 64-bit position read from poslist */ i64 iPos = 0; /* 64-bit position read from poslist */
int iOff = 0; /* Current offset within position list */ int iOff = 0; /* Current offset within position list */
pPos = pCsr->pIter->pData; pPos = pCsr->pIter->pData;
nPos = pCsr->pIter->nData; nPos = pCsr->pIter->nData;
switch( pCsr->pConfig->eDetail ){
case FTS5_DETAIL_FULL: switch( pTab->eType ){
pPos = pCsr->pIter->pData; case FTS5_VOCAB_ROW:
nPos = pCsr->pIter->nData; if( eDetail==FTS5_DETAIL_FULL ){
if( pTab->eType==FTS5_VOCAB_ROW ){
while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
pCsr->aCnt[0]++; pCsr->aCnt[0]++;
} }
pCsr->aDoc[0]++; }
}else{ pCsr->aDoc[0]++;
break;
case FTS5_VOCAB_COL:
if( eDetail==FTS5_DETAIL_FULL ){
int iCol = -1; int iCol = -1;
while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff, &iPos) ){
int ii = FTS5_POS2COLUMN(iPos); int ii = FTS5_POS2COLUMN(iPos);
@ -449,13 +529,7 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
iCol = ii; iCol = ii;
} }
} }
} }else if( eDetail==FTS5_DETAIL_COLUMNS ){
break;
case FTS5_DETAIL_COLUMNS:
if( pTab->eType==FTS5_VOCAB_ROW ){
pCsr->aDoc[0]++;
}else{
while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){ while( 0==sqlite3Fts5PoslistNext64(pPos, nPos, &iOff,&iPos) ){
assert_nc( iPos>=0 && iPos<nCol ); assert_nc( iPos>=0 && iPos<nCol );
if( iPos>=nCol ){ if( iPos>=nCol ){
@ -464,18 +538,21 @@ static int fts5VocabNextMethod(sqlite3_vtab_cursor *pCursor){
} }
pCsr->aDoc[iPos]++; pCsr->aDoc[iPos]++;
} }
}else{
assert( eDetail==FTS5_DETAIL_NONE );
pCsr->aDoc[0]++;
} }
break; break;
default: default:
assert( pCsr->pConfig->eDetail==FTS5_DETAIL_NONE ); assert( pTab->eType==FTS5_VOCAB_INSTANCE );
pCsr->aDoc[0]++;
break; break;
} }
if( rc==SQLITE_OK ){ if( rc==SQLITE_OK ){
rc = sqlite3Fts5IterNextScan(pCsr->pIter); rc = sqlite3Fts5IterNextScan(pCsr->pIter);
} }
if( pTab->eType==FTS5_VOCAB_INSTANCE ) break;
if( rc==SQLITE_OK ){ if( rc==SQLITE_OK ){
zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm); zTerm = sqlite3Fts5IterTerm(pCsr->pIter, &nTerm);
@ -505,7 +582,9 @@ static int fts5VocabFilterMethod(
int nUnused, /* Number of elements in apVal */ int nUnused, /* Number of elements in apVal */
sqlite3_value **apVal /* Arguments for the indexing scheme */ sqlite3_value **apVal /* Arguments for the indexing scheme */
){ ){
Fts5VocabTable *pTab = (Fts5VocabTable*)pCursor->pVtab;
Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor; Fts5VocabCursor *pCsr = (Fts5VocabCursor*)pCursor;
int eType = pTab->eType;
int rc = SQLITE_OK; int rc = SQLITE_OK;
int iVal = 0; int iVal = 0;
@ -545,11 +624,16 @@ static int fts5VocabFilterMethod(
} }
} }
if( rc==SQLITE_OK ){ if( rc==SQLITE_OK ){
rc = sqlite3Fts5IndexQuery(pCsr->pIndex, zTerm, nTerm, f, 0, &pCsr->pIter); rc = sqlite3Fts5IndexQuery(pCsr->pIndex, zTerm, nTerm, f, 0, &pCsr->pIter);
} }
if( rc==SQLITE_OK ){ if( rc==SQLITE_OK && eType==FTS5_VOCAB_INSTANCE ){
rc = fts5VocabInstanceNewTerm(pCsr);
}
if( rc==SQLITE_OK
&& !pCsr->bEof
&& (eType!=FTS5_VOCAB_INSTANCE || pCsr->pConfig->eDetail!=FTS5_DETAIL_NONE)
){
rc = fts5VocabNextMethod(pCursor); rc = fts5VocabNextMethod(pCursor);
} }
@ -591,13 +675,42 @@ static int fts5VocabColumnMethod(
}else{ }else{
iVal = pCsr->aCnt[pCsr->iCol]; iVal = pCsr->aCnt[pCsr->iCol];
} }
}else{ }else if( eType==FTS5_VOCAB_ROW ){
assert( iCol==1 || iCol==2 ); assert( iCol==1 || iCol==2 );
if( iCol==1 ){ if( iCol==1 ){
iVal = pCsr->aDoc[0]; iVal = pCsr->aDoc[0];
}else{ }else{
iVal = pCsr->aCnt[0]; iVal = pCsr->aCnt[0];
} }
}else{
int eDetail = pCsr->pConfig->eDetail;
assert( eType==FTS5_VOCAB_INSTANCE );
switch( iCol ){
case 1:
sqlite3_result_int64(pCtx, pCsr->pIter->iRowid);
break;
case 2: {
int ii = -1;
if( eDetail==FTS5_DETAIL_FULL ){
ii = FTS5_POS2COLUMN(pCsr->iInstPos);
}else if( eDetail==FTS5_DETAIL_COLUMNS ){
ii = pCsr->iInstPos;
}
if( ii>=0 && ii<pCsr->pConfig->nCol ){
const char *z = pCsr->pConfig->azCol[ii];
sqlite3_result_text(pCtx, z, -1, SQLITE_STATIC);
}
break;
}
default: {
assert( iCol==3 );
if( eDetail==FTS5_DETAIL_FULL ){
int ii = FTS5_POS2OFFSET(pCsr->iInstPos);
sqlite3_result_int(pCtx, ii);
}
break;
}
}
} }
if( iVal>0 ) sqlite3_result_int64(pCtx, iVal); if( iVal>0 ) sqlite3_result_int64(pCtx, iVal);

View File

@ -1,5 +1,5 @@
C Add\sa\sfirst\sdraft\sof\sthe\s"vtablog"\sextensions\sthat\simplements\sa\sgeneric\nvirtual\stable\suseful\sfor\sexperimentation\susing\sthe\scommand-line\sshell. C Add\sthe\s"instance"\stype\sto\sthe\sfts5vocab\svirtual\stable\smodule.\sFor\sdirect\naccess\sto\sthe\scontents\sof\sthe\sfts5\sterm\sindex.
D 2017-08-10T03:27:27.239 D 2017-08-10T20:36:56.419
F Makefile.in d9873c9925917cca9990ee24be17eb9613a668012c85a343aef7e5536ae266e8 F Makefile.in d9873c9925917cca9990ee24be17eb9613a668012c85a343aef7e5536ae266e8
F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434 F Makefile.linux-gcc 7bc79876b875010e8c8f9502eb935ca92aa3c434
F Makefile.msc 02b469e9dcd5b7ee63fc1fb05babc174260ee4cfa4e0ef2e48c3c6801567a016 F Makefile.msc 02b469e9dcd5b7ee63fc1fb05babc174260ee4cfa4e0ef2e48c3c6801567a016
@ -114,7 +114,7 @@ F ext/fts5/fts5_test_tok.c ffd657dd67e7fcdb31bf63fb60b6d867299a581d0f46e97086aba
F ext/fts5/fts5_tokenize.c 2ce7b44183538ec46b7907726262ee43ffdd39a8 F ext/fts5/fts5_tokenize.c 2ce7b44183538ec46b7907726262ee43ffdd39a8
F ext/fts5/fts5_unicode2.c b450b209b157d598f7b9df9f837afb75a14c24bf F ext/fts5/fts5_unicode2.c b450b209b157d598f7b9df9f837afb75a14c24bf
F ext/fts5/fts5_varint.c a5aceacda04dafcbae725413d7a16818ecd65738 F ext/fts5/fts5_varint.c a5aceacda04dafcbae725413d7a16818ecd65738
F ext/fts5/fts5_vocab.c e44fefa7f0c1db252998af071daf06a7147e17e7 F ext/fts5/fts5_vocab.c 90783d59cb8ee29ae08ac7f7e1f9c04cc4fb3ffc46d34fedba96d145636dd39d
F ext/fts5/fts5parse.y a070b538e08ae9e2177d15c337ed2a3464408f0f886e746307098f746efd94ca F ext/fts5/fts5parse.y a070b538e08ae9e2177d15c337ed2a3464408f0f886e746307098f746efd94ca
F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba F ext/fts5/mkportersteps.tcl 5acf962d2e0074f701620bb5308155fa1e4a63ba
F ext/fts5/test/fts5_common.tcl b01c584144b5064f30e6c648145a2dd6bc440841 F ext/fts5/test/fts5_common.tcl b01c584144b5064f30e6c648145a2dd6bc440841
@ -1645,7 +1645,7 @@ F vsixtest/vsixtest.tcl 6a9a6ab600c25a91a7acc6293828957a386a8a93
F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc F vsixtest/vsixtest.vcxproj.data 2ed517e100c66dc455b492e1a33350c1b20fbcdc
F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e F vsixtest/vsixtest.vcxproj.filters 37e51ffedcdb064aad6ff33b6148725226cd608e
F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0 F vsixtest/vsixtest_TemporaryKey.pfx e5b1b036facdb453873e7084e1cae9102ccc67a0
P dcdf091388251292ff9939bdff920708320bc64dacfe0fa1878c5ffd11b679c9 P e49279e65169a939b6058a0960dc1fe09ce4ee2d78992a1969773cbc7ce1043b
R 089d3b3d49f7f300de0a7e207b239b0f R 51c4219f1ac2413928699169835fef62
U drh U dan
Z e2065e8cae31fe4aaa6c975ba17a09f2 Z c67de475a6759cd5bb3ba77ac9b27176

View File

@ -1 +1 @@
e49279e65169a939b6058a0960dc1fe09ce4ee2d78992a1969773cbc7ce1043b 34a7bd7121a478e14982d59be95ad891fab8050ad5e006638f826c57c392b93e