Another change to the fts5 tokenizer API.

FossilOrigin-Name: fc71868496f45f9c7a79ed2bf2d164a7c4718ce1
This commit is contained in:
dan 2015-08-29 15:44:27 +00:00
parent 57e0add3f9
commit ee0c0a8de3
11 changed files with 70 additions and 61 deletions

View File

@ -217,7 +217,7 @@ struct Fts5ExtensionApi {
int (*xTokenize)(Fts5Context*,
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int, int) /* Callback */
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
);
int (*xPhraseCount)(Fts5Context*);
@ -313,20 +313,25 @@ struct fts5_tokenizer {
const char *pText, int nText,
int (*xToken)(
void *pCtx, /* Copy of 2nd argument to xTokenize() */
int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Pointer to buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Byte offset of token within input text */
int iEnd, /* Byte offset of end of token within input text */
int iPos /* Number of tokens before this one in input text */
int iEnd /* Byte offset of end of token within input text */
)
);
};
/* Flags that may be passed as the third argument to xTokenize() */
#define FTS5_TOKENIZE_QUERY 0x0001
#define FTS5_TOKENIZE_PREFIX 0x0002
#define FTS5_TOKENIZE_DOCUMENT 0x0004
#define FTS5_TOKENIZE_AUX 0x0008
/* Flags that may be passed by the tokenizer implementation back to FTS5
** as the third argument to the supplied xToken callback. */
#define FTS5_TOKEN_COLOCATED 0x0001 /* Same position as prev. token */
/*
** END OF CUSTOM TOKENIZERS
*************************************************************************/

View File

@ -169,7 +169,7 @@ int sqlite3Fts5Tokenize(
int flags, /* FTS5_TOKENIZE_* flags */
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int, int) /* Callback */
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
);
void sqlite3Fts5Dequote(char *z);

View File

@ -148,17 +148,18 @@ static void fts5HighlightAppend(
*/
static int fts5HighlightCb(
void *pContext, /* Pointer to HighlightContext object */
int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStartOff, /* Start offset of token */
int iEndOff, /* End offset of token */
int iPos
int iEndOff /* End offset of token */
){
HighlightContext *p = (HighlightContext*)pContext;
int rc = SQLITE_OK;
int iPos;
if( iPos<p->iPos ) return SQLITE_OK;
p->iPos = iPos+1;
if( tflags & FTS5_TOKEN_COLOCATED ) return SQLITE_OK;
iPos = p->iPos++;
if( p->iRangeEnd>0 ){
if( iPos<p->iRangeStart || iPos>p->iRangeEnd ) return SQLITE_OK;

View File

@ -648,7 +648,7 @@ int sqlite3Fts5Tokenize(
int flags, /* FTS5_TOKENIZE_* flags */
const char *pText, int nText, /* Text to tokenize */
void *pCtx, /* Context passed to xToken() */
int (*xToken)(void*, const char*, int, int, int, int) /* Callback */
int (*xToken)(void*, int, const char*, int, int, int) /* Callback */
){
if( pText==0 ) return SQLITE_OK;
return pConfig->pTokApi->xTokenize(

View File

@ -1338,11 +1338,11 @@ struct TokenCtx {
*/
static int fts5ParseTokenize(
void *pContext, /* Pointer to Fts5InsertCtx object */
int tflags, /* Mask of FTS5_TOKEN_* flags */
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Start offset of token */
int iEnd, /* End offset of token */
int iPos
int iEnd /* End offset of token */
){
int rc = SQLITE_OK;
const int SZALLOC = 8;
@ -1350,6 +1350,8 @@ static int fts5ParseTokenize(
Fts5ExprPhrase *pPhrase = pCtx->pPhrase;
Fts5ExprTerm *pTerm;
if( tflags & FTS5_TOKEN_COLOCATED ) return rc;
if( pPhrase==0 || (pPhrase->nTerm % SZALLOC)==0 ){
Fts5ExprPhrase *pNew;
int nNew = SZALLOC + (pPhrase ? pPhrase->nTerm : 0);

View File

@ -1498,7 +1498,7 @@ static int fts5ApiTokenize(
Fts5Context *pCtx,
const char *pText, int nText,
void *pUserData,
int (*xToken)(void*, const char*, int, int, int, int)
int (*xToken)(void*, int, const char*, int, int, int)
){
Fts5Cursor *pCsr = (Fts5Cursor*)pCtx;
Fts5Table *pTab = (Fts5Table*)(pCsr->base.pVtab);
@ -1657,14 +1657,16 @@ static int fts5ApiColumnText(
static int fts5ColumnSizeCb(
void *pContext, /* Pointer to int */
int tflags,
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Start offset of token */
int iEnd, /* End offset of token */
int iPos
int iEnd /* End offset of token */
){
int *pCnt = (int*)pContext;
*pCnt = iPos+1;
if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
(*pCnt)++;
}
return SQLITE_OK;
}

View File

@ -359,17 +359,18 @@ struct Fts5InsertCtx {
*/
static int fts5StorageInsertCallback(
void *pContext, /* Pointer to Fts5InsertCtx object */
int tflags,
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Start offset of token */
int iEnd, /* End offset of token */
int iPos
int iEnd /* End offset of token */
){
Fts5InsertCtx *pCtx = (Fts5InsertCtx*)pContext;
Fts5Index *pIdx = pCtx->pStorage->pIndex;
assert( iPos+1>=pCtx->szCol );
pCtx->szCol = iPos+1;
return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, iPos, pToken, nToken);
if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
pCtx->szCol++;
}
return sqlite3Fts5IndexWrite(pIdx, pCtx->iCol, pCtx->szCol-1, pToken, nToken);
}
/*
@ -844,17 +845,18 @@ struct Fts5IntegrityCtx {
*/
static int fts5StorageIntegrityCallback(
void *pContext, /* Pointer to Fts5InsertCtx object */
int tflags,
const char *pToken, /* Buffer containing token */
int nToken, /* Size of token in bytes */
int iStart, /* Start offset of token */
int iEnd, /* End offset of token */
int iPos
int iEnd /* End offset of token */
){
Fts5IntegrityCtx *pCtx = (Fts5IntegrityCtx*)pContext;
assert( iPos+1>=pCtx->szCol );
pCtx->szCol = iPos+1;
if( (tflags & FTS5_TOKEN_COLOCATED)==0 ){
pCtx->szCol++;
}
pCtx->cksum ^= sqlite3Fts5IndexCksum(
pCtx->pConfig, pCtx->iRowid, pCtx->iCol, iPos, pToken, nToken
pCtx->pConfig, pCtx->iRowid, pCtx->iCol, pCtx->szCol-1, pToken, nToken
);
return SQLITE_OK;
}

View File

@ -141,8 +141,9 @@ struct F5tAuxData {
static int xTokenizeCb(
void *pCtx,
int tflags,
const char *zToken, int nToken,
int iStart, int iEnd, int iPos
int iStart, int iEnd
){
F5tFunction *p = (F5tFunction*)pCtx;
Tcl_Obj *pEval = Tcl_DuplicateObj(p->pScript);
@ -584,8 +585,9 @@ struct F5tTokenizeCtx {
static int xTokenizeCb2(
void *pCtx,
int tflags,
const char *zToken, int nToken,
int iStart, int iEnd, int iPos
int iStart, int iEnd
){
F5tTokenizeCtx *p = (F5tTokenizeCtx*)pCtx;
if( p->bSubst ){
@ -694,7 +696,7 @@ typedef struct F5tTokenizerModule F5tTokenizerInstance;
struct F5tTokenizerContext {
void *pCtx;
int (*xToken)(void*, const char*, int, int, int);
int (*xToken)(void*, int, const char*, int, int, int);
};
struct F5tTokenizerModule {
@ -752,11 +754,11 @@ static int f5tTokenizerTokenize(
void *pCtx,
int flags,
const char *pText, int nText,
int (*xToken)(void*, const char*, int, int, int, int)
int (*xToken)(void*, int, const char*, int, int, int)
){
F5tTokenizerInstance *pInst = (F5tTokenizerInstance*)p;
void *pOldCtx;
int (*xOldToken)(void*, const char*, int, int, int);
int (*xOldToken)(void*, int, const char*, int, int, int);
Tcl_Obj *pEval;
int rc;
@ -813,7 +815,7 @@ static int f5tTokenizerReturn(
return TCL_ERROR;
}
rc = p->xToken(p->pCtx, zToken, nToken, iStart, iEnd);
rc = p->xToken(p->pCtx, 0, zToken, nToken, iStart, iEnd);
Tcl_SetResult(interp, (char*)sqlite3ErrName(rc), TCL_VOLATILE);
return TCL_OK;
}

View File

@ -118,13 +118,12 @@ static int fts5AsciiTokenize(
void *pCtx,
int flags,
const char *pText, int nText,
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
){
AsciiTokenizer *p = (AsciiTokenizer*)pTokenizer;
int rc = SQLITE_OK;
int ie;
int is = 0;
int iPos = 0;
char aFold[64];
int nFold = sizeof(aFold);
@ -160,7 +159,7 @@ static int fts5AsciiTokenize(
asciiFold(pFold, &pText[is], nByte);
/* Invoke the token callback */
rc = xToken(pCtx, pFold, nByte, is, ie, iPos++);
rc = xToken(pCtx, 0, pFold, nByte, is, ie);
is = ie+1;
}
@ -389,12 +388,11 @@ static int fts5UnicodeTokenize(
void *pCtx,
int flags,
const char *pText, int nText,
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
){
Unicode61Tokenizer *p = (Unicode61Tokenizer*)pTokenizer;
int rc = SQLITE_OK;
unsigned char *a = p->aTokenChar;
int iPos = 0;
unsigned char *zTerm = (unsigned char*)&pText[nText];
unsigned char *zCsr = (unsigned char *)pText;
@ -479,7 +477,7 @@ static int fts5UnicodeTokenize(
}
/* Invoke the token callback */
rc = xToken(pCtx, aFold, zOut-aFold, is, ie, iPos++);
rc = xToken(pCtx, 0, aFold, zOut-aFold, is, ie);
}
tokenize_done:
@ -557,7 +555,7 @@ static int fts5PorterCreate(
typedef struct PorterContext PorterContext;
struct PorterContext {
void *pCtx;
int (*xToken)(void*, const char*, int, int, int, int);
int (*xToken)(void*, int, const char*, int, int, int);
char *aBuf;
};
@ -1122,11 +1120,11 @@ static void fts5PorterStep1A(char *aBuf, int *pnBuf){
static int fts5PorterCb(
void *pCtx,
int tflags,
const char *pToken,
int nToken,
int iStart,
int iEnd,
int iPos
int iEnd
){
PorterContext *p = (PorterContext*)pCtx;
@ -1180,10 +1178,10 @@ static int fts5PorterCb(
nBuf--;
}
return p->xToken(p->pCtx, aBuf, nBuf, iStart, iEnd, iPos);
return p->xToken(p->pCtx, tflags, aBuf, nBuf, iStart, iEnd);
pass_through:
return p->xToken(p->pCtx, pToken, nToken, iStart, iEnd, iPos);
return p->xToken(p->pCtx, tflags, pToken, nToken, iStart, iEnd);
}
/*
@ -1194,7 +1192,7 @@ static int fts5PorterTokenize(
void *pCtx,
int flags,
const char *pText, int nText,
int (*xToken)(void*, const char*, int nToken, int iStart, int iEnd, int iPos)
int (*xToken)(void*, int, const char*, int nToken, int iStart, int iEnd)
){
PorterTokenizer *p = (PorterTokenizer*)pTokenizer;
PorterContext sCtx;

View File

@ -1,5 +1,5 @@
C Change\sthe\sfts5\stokenizer\sAPI\sto\sallow\smore\sthan\sone\stoken\sto\soccupy\sa\ssingle\sposition\swithin\sa\sdocument.
D 2015-08-28T19:56:47.300
C Another\schange\sto\sthe\sfts5\stokenizer\sAPI.
D 2015-08-29T15:44:27.938
F Makefile.arm-wince-mingw32ce-gcc d6df77f1f48d690bd73162294bbba7f59507c72f
F Makefile.in e2218eb228374422969de7b1680eda6864affcef
F Makefile.linux-gcc 91d710bdc4998cb015f39edf3cb314ec4f4d7e23
@ -105,19 +105,19 @@ F ext/fts3/unicode/UnicodeData.txt cd07314edb62d49fde34debdaf92fa2aa69011e7
F ext/fts3/unicode/mkunicode.tcl 95cf7ec186e48d4985e433ff8a1c89090a774252
F ext/fts3/unicode/parseunicode.tcl da577d1384810fb4e2b209bf3313074353193e95
F ext/fts5/extract_api_docs.tcl 06583c935f89075ea0b32f85efa5dd7619fcbd03
F ext/fts5/fts5.h b9dfb487ada3caab4400210609b8309b71a4fb4d
F ext/fts5/fts5Int.h b0cfe44ec9451f766b77c4e5f771e7919c6dc8d5
F ext/fts5/fts5_aux.c 7d0e275ee94ad7afdd4208d6b071b4319e8f9ca0
F ext/fts5/fts5.h 0784692f406588e6c90e13a78e1f36e7e3236e42
F ext/fts5/fts5Int.h 9fd31e682acae32806f77e7c3b543c4294274c92
F ext/fts5/fts5_aux.c 7a307760a9c57c750d043188ec0bad59f5b5ec7e
F ext/fts5/fts5_buffer.c 80f9ba4431848cb857e3d2158f5280093dcd8015
F ext/fts5/fts5_config.c ab81c8ccff6c0fb79f21c369e18e8e0dec365ec5
F ext/fts5/fts5_expr.c f53917b6e68dee62e4c525466edacacf82eb7cbc
F ext/fts5/fts5_config.c 80b61fd2c6844b64a3e72a64572d50a812da9384
F ext/fts5/fts5_expr.c 7ea46f676491989069d31ae1f75c9439b0858711
F ext/fts5/fts5_hash.c 4bf4b99708848357b8a2b5819e509eb6d3df9246
F ext/fts5/fts5_index.c 076c4995bf06a6d1559a6e31f9a86b90f2105374
F ext/fts5/fts5_main.c 7afdb84ac40b0e5bbb920a07a5cd5e062963816c
F ext/fts5/fts5_storage.c 9c263323479a4aa554738e421813cd05615d379c
F ext/fts5/fts5_tcl.c 41e2d6b455547a157085fd35fd59d4fd890dc7d3
F ext/fts5/fts5_main.c b00834ac543431dc35edbe18018b4befe0c7fd42
F ext/fts5/fts5_storage.c 9820e7b53ea12baf3c818485efd66346b73030c3
F ext/fts5/fts5_tcl.c 058f8da51964458e9859edfc1ee13b1863edaeae
F ext/fts5/fts5_test_mi.c 80a9e86fb4c5b6b58f8fefac05e9b96d1a6574e1
F ext/fts5/fts5_tokenize.c 07a894410bc074685ddc0a9d89b5e7bf57ea4482
F ext/fts5/fts5_tokenize.c 710541513ecf3fe6d9365326fc85aee6efe97229
F ext/fts5/fts5_unicode2.c 78273fbd588d1d9bd0a7e4e0ccc9207348bae33c
F ext/fts5/fts5_varint.c 3f86ce09cab152e3d45490d7586b7ed2e40c13f1
F ext/fts5/fts5_vocab.c 4622e0b7d84a488a1585aaa56eb214ee67a988bc
@ -1380,10 +1380,7 @@ F tool/vdbe_profile.tcl 67746953071a9f8f2f668b73fe899074e2c6d8c1
F tool/warnings-clang.sh f6aa929dc20ef1f856af04a730772f59283631d4
F tool/warnings.sh 48bd54594752d5be3337f12c72f28d2080cb630b
F tool/win/sqlite.vsix deb315d026cc8400325c5863eef847784a219a2f
P 0fdc36fe35ae2fc8e9688fe6c53437f4d47502d9
R 694c0e23ba08ed9bcc32d2c502ed8f13
T *branch * fts5-incompatible
T *sym-fts5-incompatible *
T -sym-trunk *
P 90b85b42f2b2dd3e939b129b7df2b822a05e243d
R f343432805e01f14633e088d58d566cf
U dan
Z 745a50831400d199b74f44c2476ec260
Z e4288542e4294b868813263b0597051d

View File

@ -1 +1 @@
90b85b42f2b2dd3e939b129b7df2b822a05e243d
fc71868496f45f9c7a79ed2bf2d164a7c4718ce1