Add new test file fts5_test_mi.c, containing an implementation of a function similar to FTS4 matchinfo() for FTS5.

FossilOrigin-Name: 4f9520a9dc9c667b7fda5b0822de2bf48184ac99
This commit is contained in:
dan 2015-08-04 20:29:00 +00:00
parent dc6ef98742
commit d3789c0028
8 changed files with 426 additions and 22 deletions

View File

@ -415,6 +415,7 @@ TESTSRC += \
$(TOP)/ext/misc/fileio.c \
$(TOP)/ext/misc/fuzzer.c \
$(TOP)/ext/fts5/fts5_tcl.c \
$(TOP)/ext/fts5/fts5_test_mi.c \
$(TOP)/ext/misc/ieee754.c \
$(TOP)/ext/misc/nextchar.c \
$(TOP)/ext/misc/percentile.c \

View File

@ -1081,6 +1081,7 @@ TESTEXT = \
$(TOP)\ext\misc\fuzzer.c \
fts5.c \
$(TOP)\ext\fts5\fts5_tcl.c \
$(TOP)\ext\fts5\fts5_test_mi.c \
$(TOP)\ext\misc\ieee754.c \
$(TOP)\ext\misc\nextchar.c \
$(TOP)\ext\misc\percentile.c \

View File

@ -60,7 +60,10 @@ typedef void (*fts5_extension_function)(
** an OOM condition or IO error), an appropriate SQLite error code is
** returned.
**
** xColumnCount(pFts, iCol, pnToken):
** xColumnCount(pFts):
** Return the number of columns in the table.
**
** xColumnSize(pFts, iCol, pnToken):
** If parameter iCol is less than zero, set output variable *pnToken
** to the total number of tokens in the current row. Or, if iCol is
** non-negative but less than the number of columns in the table, set
@ -71,9 +74,6 @@ typedef void (*fts5_extension_function)(
** an OOM condition or IO error), an appropriate SQLite error code is
** returned.
**
** xColumnSize:
** Reports the size in tokens of a column value from the current row.
**
** xColumnText:
** This function attempts to retrieve the text of column iCol of the
** current document. If successful, (*pz) is set to point to a buffer

View File

@ -946,6 +946,31 @@ static int f5tTokenHash(
return TCL_OK;
}
static int f5tRegisterMatchinfo(
void * clientData,
Tcl_Interp *interp,
int objc,
Tcl_Obj *CONST objv[]
){
int rc;
sqlite3 *db = 0;
if( objc!=2 ){
Tcl_WrongNumArgs(interp, 1, objv, "DB");
return TCL_ERROR;
}
if( f5tDbPointer(interp, objv[1], &db) ){
return TCL_ERROR;
}
rc = sqlite3Fts5TestRegisterMatchinfo(db);
if( rc!=SQLITE_OK ){
Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
return TCL_ERROR;
}
return TCL_OK;
}
/*
** Entry point.
*/
@ -955,12 +980,13 @@ int Fts5tcl_Init(Tcl_Interp *interp){
Tcl_ObjCmdProc *xProc;
int bTokenizeCtx;
} aCmd[] = {
{ "sqlite3_fts5_create_tokenizer", f5tCreateTokenizer, 1 },
{ "sqlite3_fts5_token", f5tTokenizerReturn, 1 },
{ "sqlite3_fts5_tokenize", f5tTokenize, 0 },
{ "sqlite3_fts5_create_function", f5tCreateFunction, 0 },
{ "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 },
{ "sqlite3_fts5_token_hash", f5tTokenHash, 0 }
{ "sqlite3_fts5_create_tokenizer", f5tCreateTokenizer, 1 },
{ "sqlite3_fts5_token", f5tTokenizerReturn, 1 },
{ "sqlite3_fts5_tokenize", f5tTokenize, 0 },
{ "sqlite3_fts5_create_function", f5tCreateFunction, 0 },
{ "sqlite3_fts5_may_be_corrupt", f5tMayBeCorrupt, 0 },
{ "sqlite3_fts5_token_hash", f5tTokenHash, 0 },
{ "sqlite3_fts5_register_matchinfo", f5tRegisterMatchinfo, 0 }
};
int i;
F5tTokenizerContext *pContext;

374
ext/fts5/fts5_test_mi.c Normal file
View File

@ -0,0 +1,374 @@
/*
** 2015 Aug 04
**
** The author disclaims copyright to this source code. In place of
** a legal notice, here is a blessing:
**
** May you do good and not evil.
** May you find forgiveness for yourself and forgive others.
** May you share freely, never taking more than you give.
**
******************************************************************************
**
** This file contains test code only, it is not included in release
** versions of FTS5. It contains the implementation of an FTS5 auxiliary
** function very similar to the FTS4 function matchinfo():
**
** https://www.sqlite.org/fts3.html#matchinfo
**
** Known differences are that:
**
** 1) this function uses the FTS5 definition of "matchable phrase", which
** excludes any phrases that are part of an expression sub-tree that
** does not match the current row. This comes up for MATCH queries
** such as:
**
** "a OR (b AND c)"
**
** In FTS4, if a single row contains instances of tokens "a" and "c",
** but not "b", all instances of "c" are considered matches. In FTS5,
** they are not (as the "b AND c" sub-tree does not match the current
** row.
**
** 2) ...
**
** This file exports a single function that may be called to register the
** matchinfo() implementation with a database handle:
**
** int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db);
*/
#ifdef SQLITE_TEST
#ifdef SQLITE_ENABLE_FTS5
#include "fts5.h"
#include <tcl.h>
#include <assert.h>
#include <string.h>
typedef struct Fts5MatchinfoCtx Fts5MatchinfoCtx;
typedef unsigned int u32;
struct Fts5MatchinfoCtx {
int nCol; /* Number of cols in FTS5 table */
int nPhrase; /* Number of phrases in FTS5 query */
char *zArg; /* nul-term'd copy of 2nd arg */
int nRet; /* Number of elements in aRet[] */
u32 *aRet; /* Array of 32-bit unsigned ints to return */
};
/*
** Return a pointer to the fts5_api pointer for database connection db.
** If an error occurs, return NULL and leave an error in the database
** handle (accessible using sqlite3_errcode()/errmsg()).
*/
static fts5_api *fts5_api_from_db(sqlite3 *db){
fts5_api *pRet = 0;
sqlite3_stmt *pStmt = 0;
if( SQLITE_OK==sqlite3_prepare(db, "SELECT fts5()", -1, &pStmt, 0)
&& SQLITE_ROW==sqlite3_step(pStmt)
&& sizeof(pRet)==sqlite3_column_bytes(pStmt, 0)
){
memcpy(&pRet, sqlite3_column_blob(pStmt, 0), sizeof(pRet));
}
sqlite3_finalize(pStmt);
return pRet;
}
/*
** Argument f should be a flag accepted by matchinfo() (a valid character
** in the string passed as the second argument). If it is not, 0 is
** returned. Otherwise, if f is a valid matchinfo flag, the value returned
** is the number of 32-bit integers added to the output array if the
** table has nCol columns and the query nPhrase phrases.
*/
static int fts5MatchinfoFlagsize(int nCol, int nPhrase, char f){
int ret = 0;
switch( f ){
case 'p': ret = 1; break;
case 'c': ret = 1; break;
case 'x': ret = 3 * nCol * nPhrase; break;
case 'y': ret = nCol * nPhrase; break;
case 'b': ret = ((nCol + 31) / 32) * nPhrase; break;
case 'n': ret = 1; break;
case 'a': ret = nCol; break;
case 'l': ret = nCol; break;
case 's': ret = nCol; break;
}
return ret;
}
static int fts5MatchinfoIter(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
Fts5MatchinfoCtx *p,
int(*x)(const Fts5ExtensionApi*,Fts5Context*,Fts5MatchinfoCtx*,char,u32*)
){
int i;
int n = 0;
int rc = SQLITE_OK;
char f;
for(i=0; (f = p->zArg[i]); i++){
rc = x(pApi, pFts, p, f, &p->aRet[n]);
if( rc!=SQLITE_OK ) break;
n += fts5MatchinfoFlagsize(p->nCol, p->nPhrase, f);
}
return rc;
}
static int fts5MatchinfoXCb(
const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
void *pUserData
){
u32 *aOut = (u32*)pUserData;
int nCol = pApi->xColumnCount(pFts);
int nInst;
int iPrev = -1;
int rc;
int i;
rc = pApi->xInstCount(pFts, &nInst);
for(i=0; rc==SQLITE_OK && i<nInst; i++){
int iPhrase, iCol, iOff;
rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
aOut[iCol*3 + 1]++;
if( iCol!=iPrev ) aOut[iCol*3 + 2]++;
iPrev = iCol;
}
return rc;
}
static int fts5MatchinfoGlobalCb(
const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
Fts5MatchinfoCtx *p,
char f,
u32 *aOut
){
int rc = SQLITE_OK;
switch( f ){
case 'p':
aOut[0] = p->nPhrase;
break;
case 'c':
aOut[0] = p->nCol;
break;
case 'x': {
int i;
for(i=0; i<p->nPhrase && rc==SQLITE_OK; i++){
void *pPtr = (void*)&aOut[i * p->nCol * 3];
rc = pApi->xQueryPhrase(pFts, i, pPtr, fts5MatchinfoXCb);
}
break;
}
case 'n': {
sqlite3_int64 nRow;
rc = pApi->xRowCount(pFts, &nRow);
aOut[0] = (u32)nRow;
break;
}
case 'a': {
sqlite3_int64 nRow = 0;
rc = pApi->xRowCount(pFts, &nRow);
if( nRow==0 ){
memset(aOut, 0, sizeof(u32) * p->nCol);
}else{
int i;
for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
sqlite3_int64 nToken;
rc = pApi->xColumnTotalSize(pFts, i, &nToken);
if( rc==SQLITE_OK){
aOut[i] = (u32)((2*nToken + nRow) / (2*nRow));
}
}
}
break;
}
}
return rc;
}
static int fts5MatchinfoLocalCb(
const Fts5ExtensionApi *pApi,
Fts5Context *pFts,
Fts5MatchinfoCtx *p,
char f,
u32 *aOut
){
int i;
int rc = SQLITE_OK;
switch( f ){
case 'b':
case 'x':
case 'y': {
int nInst;
int nMul = (f=='x' ? 3 : 1);
if( f=='b' ){
int nInt = ((p->nCol + 31) / 32) * p->nPhrase;
for(i=0; i<nInt; i++) aOut[i] = 0;
}else{
for(i=0; i<(p->nCol*p->nPhrase); i++) aOut[i*nMul] = 0;
}
rc = pApi->xInstCount(pFts, &nInst);
for(i=0; rc==SQLITE_OK && i<nInst; i++){
int iPhrase, iOff, iCol = 0;
rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
if( f=='b' ){
aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << (iCol%32));
}else{
aOut[nMul * (iCol + iPhrase * p->nCol)]++;
}
}
break;
}
case 'l': {
for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
int nToken;
rc = pApi->xColumnSize(pFts, i, &nToken);
aOut[i] = (u32)nToken;
}
break;
}
case 's':
memset(aOut, 0, sizeof(u32) * p->nCol);
break;
}
return rc;
}
static Fts5MatchinfoCtx *fts5MatchinfoNew(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning error message */
const char *zArg /* Matchinfo flag string */
){
Fts5MatchinfoCtx *p;
int nCol;
int nPhrase;
int i;
int nInt;
int nByte;
int rc;
nCol = pApi->xColumnCount(pFts);
nPhrase = pApi->xPhraseCount(pFts);
nInt = 0;
for(i=0; zArg[i]; i++){
int n = fts5MatchinfoFlagsize(nCol, nPhrase, zArg[i]);
if( n==0 ){
char *zErr = sqlite3_mprintf("unrecognized matchinfo flag: %c", zArg[i]);
sqlite3_result_error(pCtx, zErr, -1);
sqlite3_free(zErr);
return 0;
}
nInt += n;
}
nByte = sizeof(Fts5MatchinfoCtx) /* The struct itself */
+ sizeof(u32) * nInt /* The p->aRet[] array */
+ (i+1); /* The p->zArg string */
p = (Fts5MatchinfoCtx*)sqlite3_malloc(nByte);
if( p==0 ){
sqlite3_result_error_nomem(pCtx);
return 0;
}
memset(p, 0, nByte);
p->nCol = nCol;
p->nPhrase = nPhrase;
p->aRet = (u32*)&p[1];
p->nRet = nInt;
p->zArg = (char*)&p->aRet[nInt];
memcpy(p->zArg, zArg, i);
rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoGlobalCb);
if( rc!=SQLITE_OK ){
sqlite3_result_error_code(pCtx, rc);
sqlite3_free(p);
p = 0;
}
return p;
}
static void fts5MatchinfoFunc(
const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
Fts5Context *pFts, /* First arg to pass to pApi functions */
sqlite3_context *pCtx, /* Context for returning result/error */
int nVal, /* Number of values in apVal[] array */
sqlite3_value **apVal /* Array of trailing arguments */
){
const char *zArg;
Fts5MatchinfoCtx *p;
int rc;
if( nVal>0 ){
zArg = (const char*)sqlite3_value_text(apVal[0]);
}else{
zArg = "pcx";
}
p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
pApi->xSetAuxdata(pFts, p, sqlite3_free);
if( p==0 ) return;
}
rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
if( rc!=SQLITE_OK ){
sqlite3_result_error_code(pCtx, rc);
}else{
/* No errors has occured, so return a copy of the array of integers. */
int nByte = p->nRet * sizeof(u32);
sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
}
}
int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db){
int rc; /* Return code */
fts5_api *pApi; /* FTS5 API functions */
/* Extract the FTS5 API pointer from the database handle. The
** fts5_api_from_db() function above is copied verbatim from the
** FTS5 documentation. Refer there for details. */
pApi = fts5_api_from_db(db);
/* If fts5_api_from_db() returns NULL, then either FTS5 is not registered
** with this database handle, or an error (OOM perhaps?) has occurred.
**
** Also check that the fts5_api object is version 1 or newer (there
** is no actual version of FTS5 that would return an API object of version
** 0, but FTS5 extensions should check the API version before using it). */
if( pApi==0 || pApi->iVersion<1 ){
return SQLITE_ERROR;
}
/* Register the implementation of matchinfo() */
rc = pApi->xCreateFunction(pApi, "matchinfo", 0, fts5MatchinfoFunc, 0);
return rc;
}
#endif /* SQLITE_ENABLE_FTS5 */
#endif /* SQLITE_TEST */

View File

@ -305,6 +305,7 @@ TESTSRC += \
$(TOP)/ext/misc/wholenumber.c \
$(TOP)/ext/misc/vfslog.c \
$(TOP)/ext/fts5/fts5_tcl.c \
$(TOP)/ext/fts5/fts5_test_mi.c \
fts5.c

View File

@ -1,9 +1,9 @@
C Improve\sthe\susage\scomment\son\ssqlite3_analyzer:\sshow\sthe\savailable\sswitches.
D 2015-08-04T19:06:50.512
C Add\snew\stest\sfile\sfts5_test_mi.c,\scontaining\san\simplementation\sof\sa\sfunction\ssimilar\sto\sFTS4\smatchinfo()\sfor\sFTS5.
D 2015-08-04T20:29:00.335
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in 4de3ef40c8b3b75c0c55ff4242a43c8ce1ad90ee
F Makefile.in 2fc9ca6bf5949d415801c007ed3004a4bdb7c380
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
F Makefile.msc 3c258b2f98adb08c7ca5950bee2d6670d6ee3f9a
F Makefile.msc 5f7861c62c41fe8e3205ef14b90ebed28fa21f1b
F Makefile.vxworks e1b65dea203f054e71653415bd8f96dcaed47858
F README.md 8ecc12493ff9f820cdea6520a9016001cb2e59b7
F VERSION 02caf9f357b853703c26f259c94cdc95033356a6
@ -105,7 +105,7 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252
F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95
F ext/fts5/extract_api_docs.tcl 06583c935f89075ea0b32f85efa5dd7619fcbd03
F ext/fts5/fts5.h 65b8b308135ac6957fcb0f41b85240212e41099c
F ext/fts5/fts5.h 458a044344e96a7a3df38839f756aee105829303
F ext/fts5/fts5Int.h 4d669e2ef0f8d51380c78403fd310ee69ce0f70e
F ext/fts5/fts5_aux.c 044cb176a815f4388308738437f6e130aa384fb0
F ext/fts5/fts5_buffer.c 80f9ba4431848cb857e3d2158f5280093dcd8015
@ -115,7 +115,8 @@ F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246
F ext/fts5/fts5_index.c f5b25da3a2eef71f2024a08323a1575eb55f7aad
F ext/fts5/fts5_main.c 4518fa10947f683f0963f7802559c69ec923d489
F ext/fts5/fts5_storage.c 22ec9b5d35a39e2b5b65daf4ba7cd47fbb2d0df5
F ext/fts5/fts5_tcl.c 85eb4e0d0fefa9420b78151496ad4599a1783e20
F ext/fts5/fts5_tcl.c fac2c0a30e708696bd5130324968eef9021c0235
F ext/fts5/fts5_test_mi.c a11a5f262fb3e36f943ce008933528c88f1520ca
F ext/fts5/fts5_tokenize.c 2836f6728bd74c7efac7487f5d9c27ca3e1b509c
F ext/fts5/fts5_unicode2.c 78273fbd588d1d9bd0a7e4e0ccc9207348bae33c
F ext/fts5/fts5_varint.c 3f86ce09cab152e3d45490d7586b7ed2e40c13f1
@ -251,7 +252,7 @@ F ext/userauth/userauth.c 5fa3bdb492f481bbc1709fc83c91ebd13460c69e
F install-sh 9d4de14ab9fb0facae2f48780b874848cbf2f895 x
F ltmain.sh 3ff0879076df340d2e23ae905484d8c15d5fdea8
F magic.txt 8273bf49ba3b0c8559cb2774495390c31fd61c60
F main.mk 538244d18f37f30a45da813a7bc43294e6fda8cc
F main.mk 73167b34b0e67c0be32c1da2d988a376851c9ab1
F mkopcodec.awk c2ff431854d702cdd2d779c9c0d1f58fa16fa4ea
F mkopcodeh.awk 0e7f04a8eb90f92259e47d80110e4e98d7ce337a
F mkso.sh fd21c06b063bb16a5d25deea1752c2da6ac3ed83
@ -1368,7 +1369,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P e3c6d4b6e738c7ea015c0c809a5f7d1a94dda945
R 9d1259ae9dfc5b130a73da5571ccf238
U drh
Z c3498853cb626c1a563db294f9147107
P 783f78e39795b2c491c342558ef59f1fc32c2858
R 0c896256c199153266774cd859a08c17
U dan
Z 531b537f6347f0c936b5830c8d118577

View File

@ -1 +1 @@
783f78e39795b2c491c342558ef59f1fc32c2858
4f9520a9dc9c667b7fda5b0822de2bf48184ac99