Add the xPhraseFirst() and xPhraseNext() fts5 APIs, for faster iteration through a single phrases position list. Also optimize xInst() and xInstCount() a bit.

FossilOrigin-Name: f7682435278419829a46bb4cc9b5625d46549e22
This commit is contained in:
dan 2015-08-12 12:11:28 +00:00
parent f053ceb9c1
commit 7918dc835b
5 changed files with 127 additions and 43 deletions

@ -32,6 +32,7 @@
typedef struct Fts5ExtensionApi Fts5ExtensionApi;
typedef struct Fts5Context Fts5Context;
typedef struct Fts5PhraseIter Fts5PhraseIter;
typedef void (*fts5_extension_function)(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
@ -41,6 +42,11 @@ typedef void (*fts5_extension_function)(
sqlite3_value **apVal /* Array of trailing arguments */
);
struct Fts5PhraseIter {
const unsigned char *a;
const unsigned char *b;
};
/*
** EXTENSION API FUNCTIONS
**
@ -174,6 +180,30 @@ typedef void (*fts5_extension_function)(
** In other words, the same value that would be returned by:
**
** SELECT count(*) FROM ftstable;
**
** xPhraseFirst()
** This function is used, along with type Fts5PhraseIter and the xPhraseNext
** method, to iterate through all instances of a single query phrase within
** the current row. This is the same information as is accessible via the
** xInstCount/xInst APIs. While the xInstCount/xInst APIs are more convenient
** to use, this API may be faster under some circumstances. To iterate
** through instances of phrase iPhrase, use the following code:
**
** Fts5PhraseIter iter;
** int iCol, iOff;
** for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
** iOff>=0;
** pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
** ){
** // An instance of phrase iPhrase at offset iOff of column iCol
** }
**
** The Fts5PhraseIter structure is defined above. Applications should not
** modify this structure directly - it should only be used as shown above
** with the xPhraseFirst() and xPhraseNext() API methods.
**
** xPhraseNext()
** See xPhraseFirst above.
*/
struct Fts5ExtensionApi {
int iVersion; /* Currently always set to 1 */
@ -205,6 +235,9 @@ struct Fts5ExtensionApi {
);
int (*xSetAuxdata)(Fts5Context*, void *pAux, void(*xDelete)(void*));
void *(*xGetAuxdata)(Fts5Context*, int bClear);
void (*xPhraseFirst)(Fts5Context*, int iPhrase, Fts5PhraseIter*, int*, int*);
void (*xPhraseNext)(Fts5Context*, Fts5PhraseIter*, int *piCol, int *piOff);
};
/*

@ -197,6 +197,8 @@ struct Fts5Cursor {
Fts5Auxdata *pAuxdata; /* First in linked list of saved aux-data */
/* Cache used by auxiliary functions xInst() and xInstCount() */
Fts5PoslistReader *aInstIter; /* One for each phrase */
int nInstAlloc; /* Size of aInst[] array (entries / 3) */
int nInstCount; /* Number of phrase instances */
int *aInst; /* 3 integers per phrase instance */
};
@ -617,6 +619,7 @@ static void fts5FreeCursorComponents(Fts5Cursor *pCsr){
Fts5Auxdata *pData;
Fts5Auxdata *pNext;
sqlite3_free(pCsr->aInstIter);
sqlite3_free(pCsr->aInst);
if( pCsr->pStmt ){
int eStmt = fts5StmtType(pCsr);
@ -1535,13 +1538,15 @@ static int fts5CacheInstArray(Fts5Cursor *pCsr){
if( CsrFlagTest(pCsr, FTS5CSR_REQUIRE_INST) ){
Fts5PoslistReader *aIter; /* One iterator for each phrase */
int nIter; /* Number of iterators/phrases */
int nByte;
nIter = sqlite3Fts5ExprPhraseCount(pCsr->pExpr);
nByte = sizeof(Fts5PoslistReader) * nIter;
aIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte);
if( pCsr->aInstIter==0 ){
int nByte = sizeof(Fts5PoslistReader) * nIter;
pCsr->aInstIter = (Fts5PoslistReader*)sqlite3Fts5MallocZero(&rc, nByte);
}
aIter = pCsr->aInstIter;
if( aIter ){
Fts5Buffer buf = {0, 0, 0}; /* Build up aInst[] here */
int nInst = 0; /* Number instances seen so far */
int i;
@ -1562,22 +1567,30 @@ static int fts5CacheInstArray(Fts5Cursor *pCsr){
iBest = i;
}
}
if( iBest<0 ) break;
nInst++;
if( sqlite3Fts5BufferGrow(&rc, &buf, nInst * sizeof(int) * 3) ) break;
aInst = &((int*)buf.p)[3 * (nInst-1)];
nInst++;
if( nInst>=pCsr->nInstAlloc ){
pCsr->nInstAlloc = pCsr->nInstAlloc ? pCsr->nInstAlloc*2 : 32;
aInst = (int*)sqlite3_realloc(
pCsr->aInst, pCsr->nInstAlloc*sizeof(int)*3
);
if( aInst ){
pCsr->aInst = aInst;
}else{
rc = SQLITE_NOMEM;
break;
}
}
aInst = &pCsr->aInst[3 * (nInst-1)];
aInst[0] = iBest;
aInst[1] = FTS5_POS2COLUMN(aIter[iBest].iPos);
aInst[2] = FTS5_POS2OFFSET(aIter[iBest].iPos);
sqlite3Fts5PoslistReaderNext(&aIter[iBest]);
}
sqlite3_free(pCsr->aInst);
pCsr->aInst = (int*)buf.p;
pCsr->nInstCount = nInst;
sqlite3_free(aIter);
CsrFlagClear(pCsr, FTS5CSR_REQUIRE_INST);
}
}
@ -1757,12 +1770,47 @@ static void *fts5ApiGetAuxdata(Fts5Context *pCtx, int bClear){
return pRet;
}
static void fts5ApiPhraseNext(
Fts5Context *pCtx,
Fts5PhraseIter *pIter,
int *piCol, int *piOff
){
if( pIter->a>=pIter->b ){
*piCol = -1;
*piOff = -1;
}else{
int iVal;
pIter->a += fts5GetVarint32(pIter->a, iVal);
if( iVal==1 ){
pIter->a += fts5GetVarint32(pIter->a, iVal);
*piCol = iVal;
*piOff = 0;
pIter->a += fts5GetVarint32(pIter->a, iVal);
}
*piOff += (iVal-2);
}
}
static void fts5ApiPhraseFirst(
Fts5Context *pCtx,
int iPhrase,
Fts5PhraseIter *pIter,
int *piCol, int *piOff
){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
int n = fts5CsrPoslist(pCsr, iPhrase, &pIter->a);
pIter->b = &pIter->a[n];
*piCol = 0;
*piOff = 0;
fts5ApiPhraseNext(pCtx, pIter, piCol, piOff);
}
static int fts5ApiQueryPhrase(Fts5Context*, int, void*,
int(*)(const Fts5ExtensionApi*, Fts5Context*, void*)
);
static const Fts5ExtensionApi sFts5Api = {
1, /* iVersion */
2, /* iVersion */
fts5ApiUserData,
fts5ApiColumnCount,
fts5ApiRowCount,
@ -1778,6 +1826,8 @@ static const Fts5ExtensionApi sFts5Api = {
fts5ApiQueryPhrase,
fts5ApiSetAuxdata,
fts5ApiGetAuxdata,
fts5ApiPhraseFirst,
fts5ApiPhraseNext,
};

@ -128,23 +128,21 @@ static int fts5MatchinfoXCb(
Fts5Context *pFts,
void *pUserData
){
Fts5PhraseIter iter;
int iCol, iOff;
u32 *aOut = (u32*)pUserData;
int nCol = pApi->xColumnCount(pFts);
int nInst;
int iPrev = -1;
int rc;
int i;
rc = pApi->xInstCount(pFts, &nInst);
for(i=0; rc==SQLITE_OK && i<nInst; i++){
int iPhrase, iCol, iOff;
rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
aOut[iCol*3 + 1]++;
for(pApi->xPhraseFirst(pFts, 0, &iter, &iCol, &iOff);
iOff>=0;
pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
){
aOut[iCol*3+1]++;
if( iCol!=iPrev ) aOut[iCol*3 + 2]++;
iPrev = iCol;
}
return rc;
return SQLITE_OK;
}
static int fts5MatchinfoGlobalCb(
@ -216,8 +214,8 @@ static int fts5MatchinfoLocalCb(
case 'b':
case 'x':
case 'y': {
int nInst;
int nMul = (f=='x' ? 3 : 1);
int iPhrase;
if( f=='b' ){
int nInt = ((p->nCol + 31) / 32) * p->nPhrase;
@ -226,14 +224,18 @@ static int fts5MatchinfoLocalCb(
for(i=0; i<(p->nCol*p->nPhrase); i++) aOut[i*nMul] = 0;
}
rc = pApi->xInstCount(pFts, &nInst);
for(i=0; rc==SQLITE_OK && i<nInst; i++){
int iPhrase, iOff, iCol = 0;
rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
if( f=='b' ){
aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << (iCol%32));
}else{
aOut[nMul * (iCol + iPhrase * p->nCol)]++;
for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
Fts5PhraseIter iter;
int iOff, iCol;
for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
iOff>=0;
pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
){
if( f=='b' ){
aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << iCol%32);
}else{
aOut[nMul * (iCol + iPhrase * p->nCol)]++;
}
}
}
@ -387,9 +389,8 @@ int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db){
/* If fts5_api_from_db() returns NULL, then either FTS5 is not registered
** with this database handle, or an error (OOM perhaps?) has occurred.
**
** Also check that the fts5_api object is version 1 or newer (there
** is no actual version of FTS5 that would return an API object of version
** 0, but FTS5 extensions should check the API version before using it). */
** Also check that the fts5_api object is version 2 or newer.
*/
if( pApi==0 || pApi->iVersion<1 ){
return SQLITE_ERROR;
}

@ -1,5 +1,5 @@
C Merge\sfixes\sfrom\sthe\sfts5NoWarn\sbranch.
D 2015-08-11T14:25:34.036
C Add\sthe\sxPhraseFirst()\sand\sxPhraseNext()\sfts5\sAPIs,\sfor\sfaster\siteration\sthrough\sa\ssingle\sphrases\sposition\slist.\sAlso\soptimize\sxInst()\sand\sxInstCount()\sa\sbit.
D 2015-08-12T12:11:28.744
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 2fc9ca6bf5949d415801c007ed3004a4bdb7c380
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -105,7 +105,7 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252
F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95
F ext/fts5/extract_api_docs.tcl 06583c935f89075ea0b32f85efa5dd7619fcbd03
F ext/fts5/fts5.h 458a044344e96a7a3df38839f756aee105829303
F ext/fts5/fts5.h 1950ec0544de667a24c1d8af9b2fde5db7db3bc9
F ext/fts5/fts5Int.h 45f2ceb3c030f70e2cc4c199e9f700c2f2367f77
F ext/fts5/fts5_aux.c 044cb176a815f4388308738437f6e130aa384fb0
F ext/fts5/fts5_buffer.c 80f9ba4431848cb857e3d2158f5280093dcd8015
@ -113,10 +113,10 @@ F ext/fts5/fts5_config.c fdfa63ae8e527ecfaa50f94063c610429cc887cf
F ext/fts5/fts5_expr.c 495b24f47f4d71b63339572a5beaf9f6e1b486fe
F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246
F ext/fts5/fts5_index.c 076c4995bf06a6d1559a6e31f9a86b90f2105374
F ext/fts5/fts5_main.c 4c8af0015aaf1db2c81df4f617840a921360ef50
F ext/fts5/fts5_main.c c5ff6eb7de5fe8e062b54bbee2b1936901533685
F ext/fts5/fts5_storage.c 22ec9b5d35a39e2b5b65daf4ba7cd47fbb2d0df5
F ext/fts5/fts5_tcl.c 96a3b9e982c4a64a242eefd752fa6669cd405a67
F ext/fts5/fts5_test_mi.c c42a34590d9393d2aa0b959398261810ca976d05
F ext/fts5/fts5_test_mi.c 80a9e86fb4c5b6b58f8fefac05e9b96d1a6574e1
F ext/fts5/fts5_tokenize.c 2836f6728bd74c7efac7487f5d9c27ca3e1b509c
F ext/fts5/fts5_unicode2.c 78273fbd588d1d9bd0a7e4e0ccc9207348bae33c
F ext/fts5/fts5_varint.c 3f86ce09cab152e3d45490d7586b7ed2e40c13f1
@ -1372,7 +1372,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P fd5608fb20831f1f1946c8941445b7acc463a143 0ddb2532b2daaaf1b0109ac360822f84cb999b7f
R f656c6f9b0efdad10e9de9eeca5d30e2
P 61cb2fc6c12810863c965c74e90bc502e20cf810
R d3b6dadb6d0d16c8d1ad296c14b96300
U dan
Z a8d5c370bccbd577084eb4f30439c21a
Z 2a4c5177ad3c19913de9e61965a0ba07

@ -1 +1 @@
61cb2fc6c12810863c965c74e90bc502e20cf810
f7682435278419829a46bb4cc9b5625d46549e22